├── cars.sqlite ├── README.md └── cars.jv /cars.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rashakil-ds/Methods-of-Advanced-Data-Engineering/main/cars.sqlite -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Methods-of-Advanced-Data-Engineering 2 | Methods of Advanced Data Engineering is one of the courses in the department of DATA SCIENCE at the University of Erlangen (FAU) 3 | -------------------------------------------------------------------------------- /cars.jv: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg 2 | // 3 | // SPDX-License-Identifier: AGPL-3.0-only 4 | 5 | // Example 1: Cars 6 | // Learning goals: 7 | // - Understand the core concepts pipeline, block, and pipe 8 | // - Understand the general structure of a pipeline 9 | 10 | // 1. This Jayvee model describes a pipeline 11 | // from a CSV file in the web 12 | // to a SQLite file sink. 13 | pipeline CarsPipeline { 14 | 15 | // 2. We describe the structure of the pipeline, 16 | // usually at the top of the pipeline. 17 | // by connecting blocks via pipes. 18 | 19 | // 3. Syntax of a pipe 20 | // connecting the block CarsExtractor 21 | // with the block CarsTextFileInterpreter. 22 | CarsExtractor -> CarsTextFileInterpreter; 23 | 24 | // 4. The output of the preceding block is hereby used 25 | // as input for the succeeding block. 26 | 27 | // 5. Pipes can be further chained, 28 | // leading to an overview of the pipeline. 29 | CarsTextFileInterpreter 30 | -> CarsCSVInterpreter 31 | -> NameHeaderWriter 32 | -> CarsTableInterpreter 33 | -> CarsLoader; 34 | 35 | 36 | // 6. Below the pipes, we usually define the blocks 37 | // that are connected by the pipes. 38 | 39 | // 7. Blocks instantiate a blocktype by using the oftype keyword. 40 | // The blocktype defines the available properties that the block 41 | // can use to specify the intended behavior of the block 42 | block CarsExtractor oftype HttpExtractor { 43 | 44 | // 8. Properties are assigned to concrete values. 45 | // Here, we specify the URL where the file shall be downloaded from. 46 | url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv"; 47 | } 48 | 49 | // 9. The HttpExtractor requires no input and produces a binary file as output. 50 | // This file has to be interpreted, e.g., as text file. 51 | block CarsTextFileInterpreter oftype TextFileInterpreter { } 52 | 53 | // 10. Next, we interpret the text file as sheet. 54 | // A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells. 55 | block CarsCSVInterpreter oftype CSVInterpreter { 56 | enclosing: '"'; 57 | } 58 | 59 | // 11. We can write into cells of a sheet using the CellWriter blocktype. 60 | block NameHeaderWriter oftype CellWriter { 61 | // 12. We utilize a syntax similar to spreadsheet programs. 62 | // Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which 63 | // cells are selected for the write action. 64 | at: cell A1; 65 | 66 | // 13. For each cell we selected with the "at" property above, 67 | // we can specify what value shall be written into the cell. 68 | write: ["name"]; 69 | } 70 | 71 | // 14. As a next step, we interpret the sheet as a table by adding structure. 72 | // We define a valuetype per column that specifies the data type of the column. 73 | // Rows that include values that are not valid according to the their valuetypes are dropped automatically. 74 | block CarsTableInterpreter oftype TableInterpreter { 75 | header: true; 76 | columns: [ 77 | "name" oftype text, 78 | "mpg" oftype decimal, 79 | "cyl" oftype integer, 80 | "disp" oftype decimal, 81 | "hp" oftype integer, 82 | "drat" oftype decimal, 83 | "wt" oftype decimal, 84 | "qsec" oftype decimal, 85 | "vs" oftype integer, 86 | "am" oftype integer, 87 | "gear" oftype integer, 88 | "carb" oftype integer 89 | ]; 90 | } 91 | 92 | // 15. As a last step, we load the table into a sink, 93 | // here into a sqlite file. 94 | // The structural information of the table is used 95 | // to generate the correct table. 96 | block CarsLoader oftype SQLiteLoader { 97 | table: "Cars"; 98 | file: "./cars.sqlite"; 99 | } 100 | 101 | // 16. Congratulations! 102 | // You can now use the sink for your data analysis, app, 103 | // or whatever you want to do with the cleaned data. 104 | } --------------------------------------------------------------------------------