├── Shell Scripting ├── readme.md ├── source │ ├── data-sed.csv │ ├── code-scribble.txt │ ├── logfile.log │ ├── welcome.sh │ ├── customer.csv │ ├── if-else.sh │ ├── if-else-args.sh │ ├── sales.csv │ ├── data-source.csv │ └── data-source.json ├── images │ ├── 1-vim.png │ ├── 2-for.png │ ├── 3-awk-1.png │ ├── 3-sed-1.png │ ├── 2-if-else.png │ ├── 2-variables.png │ ├── 2-welcome.png │ ├── 4-data-source.png │ ├── 2-if-else-args.png │ ├── 2-welcome-edit.png │ ├── 1-environment-gitbash.png │ └── 2-if-else-args-loop.png ├── assignment-solutions.png ├── intro.md ├── prerequisite.md ├── session-1.md ├── session-4.md ├── session-2.md └── session-3.md ├── Python Programming ├── hello.py ├── Data Engineering Indonesia.png ├── Introduction to Python Programming.pdf ├── data-warehouse │ └── sales_data_cleaned.csv ├── sources │ └── sales_data.csv ├── python-basic.ipynb └── data-pipeline-breakdown.ipynb └── IntroSQL ├── IntroSQL.pdf ├── images ├── sql_editor.png ├── challenge_output.png ├── full_join_result.png ├── inner_join_result.png ├── left_join_result.png ├── right_join_result.png ├── update_returning.png └── postgreSQL_connection_info.png ├── challenges.md └── README.md /Shell Scripting/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Python Programming/hello.py: -------------------------------------------------------------------------------- 1 | teks = "python sangat mudah dipahami" 2 | print(teks) -------------------------------------------------------------------------------- /IntroSQL/IntroSQL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/IntroSQL.pdf -------------------------------------------------------------------------------- /Shell Scripting/source/data-sed.csv: -------------------------------------------------------------------------------- 1 | Nama, Usia, Kota 2 | Yusuf,17,New York 3 | Zalmawati,25,Perth 4 | Reza,28,Bekasi -------------------------------------------------------------------------------- /IntroSQL/images/sql_editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/sql_editor.png -------------------------------------------------------------------------------- /Shell Scripting/images/1-vim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/1-vim.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-for.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-for.png -------------------------------------------------------------------------------- /Shell Scripting/images/3-awk-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/3-awk-1.png -------------------------------------------------------------------------------- /Shell Scripting/images/3-sed-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/3-sed-1.png -------------------------------------------------------------------------------- /IntroSQL/images/challenge_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/challenge_output.png -------------------------------------------------------------------------------- /IntroSQL/images/full_join_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/full_join_result.png -------------------------------------------------------------------------------- /IntroSQL/images/inner_join_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/inner_join_result.png -------------------------------------------------------------------------------- /IntroSQL/images/left_join_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/left_join_result.png -------------------------------------------------------------------------------- /IntroSQL/images/right_join_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/right_join_result.png -------------------------------------------------------------------------------- /IntroSQL/images/update_returning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/update_returning.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-if-else.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-if-else.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-variables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-variables.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-welcome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-welcome.png -------------------------------------------------------------------------------- /Shell Scripting/assignment-solutions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/assignment-solutions.png -------------------------------------------------------------------------------- /Shell Scripting/images/4-data-source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/4-data-source.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-if-else-args.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-if-else-args.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-welcome-edit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-welcome-edit.png -------------------------------------------------------------------------------- /IntroSQL/images/postgreSQL_connection_info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/postgreSQL_connection_info.png -------------------------------------------------------------------------------- /Shell Scripting/images/1-environment-gitbash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/1-environment-gitbash.png -------------------------------------------------------------------------------- /Shell Scripting/images/2-if-else-args-loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-if-else-args-loop.png -------------------------------------------------------------------------------- /Python Programming/Data Engineering Indonesia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Python Programming/Data Engineering Indonesia.png -------------------------------------------------------------------------------- /Python Programming/Introduction to Python Programming.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Python Programming/Introduction to Python Programming.pdf -------------------------------------------------------------------------------- /Shell Scripting/source/code-scribble.txt: -------------------------------------------------------------------------------- 1 | name="Waskito Pringgohandoko" \ 2 | age=40 \ 3 | height=180.5 \ 4 | fave_fruits=("banana" "apple" "kiwi" "jackfruit") \ 5 | this_is_true=1 \ 6 | this_is_false=0 \ 7 | null_var= 8 | 9 | -------------------------------------------------------------------------------- /Shell Scripting/source/logfile.log: -------------------------------------------------------------------------------- 1 | 2022-01-01,INFO,User logged in 2 | 2022-01-01,ERROR,Invalid input detected 3 | 2022-01-02,WARNING,Disk space low 4 | 2022-01-02,ERROR,Database connection failed 5 | 2022-01-03,INFO,User logged out -------------------------------------------------------------------------------- /Shell Scripting/source/welcome.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is a sample shell script 4 | # It displays a welcome message and the current date 5 | 6 | echo "Welcome to the Shell Scripting Course!" 7 | echo "Today's date is: $(date)" 8 | -------------------------------------------------------------------------------- /Shell Scripting/source/customer.csv: -------------------------------------------------------------------------------- 1 | customer_code,address,customer_name 2 | CUST001,123 Main Street,John Smith 3 | CUST002,456 Oak Avenue,Jane Doe 4 | CUST003,789 Elm Lane,Michael Johnson 5 | CUST004,321 Pine Road,Sarah Wilson 6 | CUST005,987 Maple Drive,Emily Brown 7 | -------------------------------------------------------------------------------- /Shell Scripting/source/if-else.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script demonstrates conditional statements 4 | 5 | read -p "Enter a number: " num 6 | 7 | if [ $num -gt 10 ]; then 8 | echo "The number is greater than 10" 9 | else 10 | echo "The number is less than or equal to 10" 11 | fi 12 | -------------------------------------------------------------------------------- /Shell Scripting/source/if-else-args.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script demonstrates conditional statements 4 | 5 | # read -p "Enter a number: " num 6 | 7 | if [ $1 -gt 10 ]; then 8 | echo "The number is greater than 10" 9 | else 10 | echo "The number is less than or equal to 10" 11 | fi 12 | -------------------------------------------------------------------------------- /Shell Scripting/source/sales.csv: -------------------------------------------------------------------------------- 1 | date,product,quantity,price,customer_id 2 | 2022-01-01,Shirt,10,25.00,CUST001 3 | 2022-01-01,Pants,5,40.00,CUST002 4 | 2022-01-02,Shoes,2,80.00,CUST003 5 | 2022-01-03,Hat,3,15.00,CUST004 6 | 2022-01-03,Shirt,7,25.00,CUST005 7 | 2022-01-04,Shoes,1,80.00,CUST001 8 | 2022-01-04,Pants,4,40.00,CUST002 9 | -------------------------------------------------------------------------------- /IntroSQL/challenges.md: -------------------------------------------------------------------------------- 1 | ## Challenges Question 2 | Display Top 10 product, order date and sum of total price with category product contains tea and breads with total price is more than 1000 sorting by highest sum of total price. 3 |
4 | Display output column product name, order date, sum of total price. 5 |
6 | Example output : 7 | ![example output](./images/challenge_output.png) 8 | -------------------------------------------------------------------------------- /Shell Scripting/source/data-source.csv: -------------------------------------------------------------------------------- 1 | Nama,Umur 2 | Arnetha_Marchelina,25 3 | Sebastian_Cahyo_Ardhi_Iswara,30 4 | Helmi_Aziz_Muhammad,22 5 | Muhammad_Ridwan_Maulana,28 6 | Arif_Setiyawan,24 7 | Rahmatulloh,31 8 | Aidiel_Fitra,23 9 | Yusuf_Hanafi_Angkat,26 10 | Isharridho_Pratama,27 11 | Vincent_Junitio_Ungu,29 12 | Hairulloh_Sukur,33 13 | Yuandika_Alfahreiza,20 14 | Sahala_Josua_Sinaga,34 15 | Septian_Dwi_Kurnia,21 16 | Fitra_Anugrah,32 17 | Muhammad_Ridwan_Maulana,35 -------------------------------------------------------------------------------- /Shell Scripting/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | 4 | My name is `Waskito Pringgohandoko`, you can call me **Kikit**. 5 | - I was born in the 80’s. 6 | - I started to scratch the surface of data engineering about a year ago. 7 | 8 | I’m here to share, and I hope you guys will enjoy this event by sharing your thoughts, concerns, and questions, and we’ll do our best to address them. 9 |
10 |
11 | # What will we do today? 12 | 13 | ## Have fun! 14 | While doing it, we will have these hands-ons: 15 | - Creating your first shell script and run it 16 | - Make the script do things across multiple use cases 17 | - Enhance the script using variables and simple programming flows 18 | - Automate the script 19 |
20 |
21 | ## Resources 22 | 23 | The scripts are provided in the /source directory 24 | 25 | -------------------------------------------------------------------------------- /Shell Scripting/prerequisite.md: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | 3 | ## Device 4 | - Bring your own laptop 5 | 6 | ## Application 7 | - Built-in Command Line Interface (Linux / MacOS) or 'terminal' 8 | - GitBash (Windows) - download [here](https://gitforwindows.org/) 9 | - Your favorite text editor such as nano/vim/Notepad++/etc. For reference, the hands-on will be using `vim`. 10 | 11 | ## Test your environment 12 | 1. Open your terminal or open GitBash 13 | 2. Open your text editor using their command, e.g. 14 | - Vim: `$ vim` 15 | - Nano: `$ nano` 16 | 3. Open another terminal/GitBash session, make sure you can run these commands: 17 | - `$ grep` 18 | - `$ sed` 19 | - `$ awk` 20 | 21 | If something like this turns up: 22 | 23 | ```shell 24 | $ awk 25 | bash: awk: command not found 26 | ``` 27 | it means your environment is missing that command. Troubleshoot and test again. -------------------------------------------------------------------------------- /Shell Scripting/session-1.md: -------------------------------------------------------------------------------- 1 | --- 2 | marp: true 3 | --- 4 | 5 | # Let’s start! 6 | 7 | ## Shell Script. What is it? 8 | 9 | In the Data Engineering context, a shell script is a powerful tool that enables us to interact with the command-line interface (CLI) of our operating system and execute a series of commands in a sequential manner. It allows us to `automate tasks` and `streamline our data workflows`. 10 | 11 | We will use a shell script called `bash`, which is widely used in the industry and offers a rich set of features and functionalities for scripting purposes. 12 | 13 | --- 14 | 15 | ## Understand your Shell Environment. 16 | 17 | - A Terminal: Linux or Mac users will be using your default terminal app, while Windows users can use GitBash for the terminal. 18 | ![a Terminal](images/1-environment-gitbash.png) 19 | --- 20 | 21 | ## Let's try it using some basic commands 22 | * $ ls 23 | * $ pwd 24 | * $ cd 25 | * $ echo 26 | 27 | --- 28 | - A Text Editor: It’s really a weapon of choice, you can use your favorite text editor. 29 | We will use `vim` for this course, please adjust it to your preferred text editor. 30 | ![vim interface](images/1-vim.png) 31 | 32 | --- 33 | 34 | - Shell Scripting libraries: 35 | - grep 36 | - awk 37 | - sed 38 | - etc. 39 | 40 | --- -------------------------------------------------------------------------------- /Shell Scripting/source/data-source.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Nama": "Arnetha_Marchelina", 4 | "Umur": 25 5 | }, 6 | { 7 | "Nama": "Sebastian_Cahyo_Ardhi_Iswara", 8 | "Umur": 30, 9 | "alamat": { 10 | "kota": "New York", 11 | "kodepos": "10001" 12 | }, 13 | "hobbies": ["reading", "painting", "hiking"] 14 | }, 15 | { 16 | "Nama": "Helmi_Aziz_Muhammad", 17 | "Umur": 22 18 | }, 19 | { 20 | "Nama": "Muhammad_Ridwan_Maulana", 21 | "Umur": 28, 22 | "alamat": { 23 | "kota": "Jakarta", 24 | "kodepos": "00892" 25 | }, 26 | "hobbies": ["swimming", "dancing"] 27 | }, 28 | { 29 | "Nama": "Arif_Setiyawan", 30 | "Umur": 24 31 | }, 32 | { 33 | "Nama": "Rahmatulloh", 34 | "Umur": 31 35 | }, 36 | { 37 | "Nama": "Aidiel_Fitra", 38 | "Umur": 23 39 | }, 40 | { 41 | "Nama": "Yusuf_Hanafi_Angkat", 42 | "Umur": 26 43 | }, 44 | { 45 | "Nama": "Isharridho_Pratama", 46 | "Umur": 27 47 | }, 48 | { 49 | "Nama": "Vincent_Junitio_Ungu", 50 | "Umur": 29 51 | }, 52 | { 53 | "Nama": "Hairulloh_Sukur", 54 | "Umur": 33 55 | }, 56 | { 57 | "Nama": "Yuandika_Alfahreiza", 58 | "Umur": 20 59 | }, 60 | { 61 | "Nama": "Sahala_Josua_Sinaga", 62 | "Umur": 34, 63 | "alamat": { 64 | "kota": "Bangalore", 65 | "kodepos": "4435AA" 66 | }, 67 | "hobbies": ["cooking", "photography"] 68 | }, 69 | { 70 | "Nama": "Septian_Dwi_Kurnia", 71 | "Umur": 21 72 | }, 73 | { 74 | "Nama": "Fitra_Anugrah", 75 | "Umur": 32 76 | }, 77 | { 78 | "Nama": "Muhammad_Ridwan_Maulana", 79 | "Umur": 35 80 | } 81 | ] -------------------------------------------------------------------------------- /Shell Scripting/session-4.md: -------------------------------------------------------------------------------- 1 | --- 2 | marp: true 3 | --- 4 | 5 | # What about JSON files? 6 | --- 7 | # Have you heard of `jq` library? 8 | - It's a powerful command-line utility and library for processing JSON data in Unix-like environments. 9 | - designed to work with JSON data streams and provides a wide range of functionalities for data filtering, formatting, and cleansing. 10 | - `jq` allows you to extract specific data from JSON, manipulate JSON structures, and perform various transformations on the data. 11 | 12 | --- 13 | 14 | # Let's learn together 15 | 16 | --- 17 | 18 | - We will use the same data as before, but in JSON format 19 | ![data-source][data-source] 20 | 21 | --- 22 | # Navigating a JSON file 23 | ## as simple as `jq 'QUERY' data-source.json` 24 | - To view the entire JSON data: 25 | `jq '.' data-source.json` 26 | - To select and display only the "Nama" and "Umur" fields of all records: 27 | `jq '.[] | {Nama, Umur}' data-source.json` 28 | - To filter records with an age greater than 25: 29 | `jq '.[] | select(.Umur > 25)' data-source.json` 30 | - To extract "Nama" and "Umur" fields of records with hobbies: 31 | `jq '.[] | select(.hobbies) | {Nama, Umur}' data-source.json` 32 | --- 33 | - To display the "kota" (city) field from records that have the "alamat" (address) field: 34 | `jq '.[] | select(.alamat) | .alamat.kota' data-source.json` 35 | - To count the number of records: 36 | `jq 'length' data-source.json` 37 | - To find the maximum and minimum age: 38 | `jq 'max_by(.Umur) | .Umur' data-source.json` 39 | `jq 'min_by(.Umur) | .Umur' data-source.json` 40 | - To extract "Nama" and "Umur" fields and format as "Nama berusia Umur" 41 | `jq '.[] | "\(.Nama) berusia \(.Umur)"' data-source.json` 42 | --- 43 | 44 | ## The `map` directive 45 | - used to apply a filter or function to each element of an array. It allows you to perform data transformations or filtering on arrays of JSON objects. 46 | - Let's try to filter records with `Umur` greater than 30 47 | `jq 'map(select(.Umur > 30))' data-source.json` 48 | - Now, we replace the underscores in `Nama` with spaces 49 | `jq 'map(.Nama |= gsub("_"; " "))' data-source.json` 50 | 51 | --- 52 | 53 | ## the `sort` directive 54 | - The `sort` function along with the `reverse` function in `jq` are used to sort the JSON data in descending order based on a specific field. The `sort` function sorts the JSON data in ascending order, and then the `reverse` function is used to reverse the order and obtain the descending sort. 55 | `jq 'sort_by(.Umur) | reverse' data-source.json` 56 | 57 | --- 58 | 59 | ## [assignment] Data Processing 60 | ### Tasks 61 | - Using the `data-source.json` file 62 | - Perform data cleansing to replace underscores in the `Nama` column with spaces. 63 | - Perform data extraction to format the output as `'Nama' berusia 'Umur'`. 64 | - Filter and group ages below-25 and 25-and-above. 65 | - Combine those commands into a single shell script file. 66 | 67 | [data-source]: images/4-data-source.png -------------------------------------------------------------------------------- /Shell Scripting/session-2.md: -------------------------------------------------------------------------------- 1 | --- 2 | marp: true 3 | --- 4 | 5 | # Let's learn 6 | --- 7 | 8 | 9 | # Our first scripts of the day 10 | 11 | ![welcome][welcome] 12 | 13 | --- 14 | 15 | ## welcome.sh 16 | - Explanation 17 | - shebang `!#/bin/bash` 18 | it is used to specify which shell interpreter to be used. 19 | - comment `# this is a comment` 20 | anything after a '#' will not be run as a command. 21 | - echo 22 | we call this command to display output on the terminal, just like the one we used before. 23 | - date 24 | this command is used to retrieve current system's date and time 25 | --- 26 | ## Execute our script! 27 | - make it `executable` using this command: `$ chmod +x welcome.sh` 28 | - run it using this command: `$ ./welcome.sh` 29 | --- 30 | ## [assignment] welcome-edit.sh 31 | 32 | - copy the welcome.sh into a new file called `welcome-edit.sh` 33 | - modify the new file so the output is similar to this (5 minutes)
34 | 35 | ![welcome-edit][welcome-edit] 36 | 37 | --- 38 | 39 | ## Variables and data types 40 | - In shell scripting, variables are used to store data or values that can be accessed and manipulated throughout the script. Variables can hold various types of data, and the data type is determined implicitly based on the context in which the variable is used. Unlike some programming languages, shell scripting is not strongly typed, meaning you don't need to explicitly define the data type when declaring a variable. 41 | 42 | --- 43 | 44 | - Examples: 45 | - String: A sequence of characters enclosed in single or double quotes. Strings are the most commonly used data type in shell scripting. 46 | - Integer: Whole numbers without any decimal points. 47 | - Floating-Point: Numbers with decimal points. 48 | - Array: A collection of values accessible by their indices. Shell scripting supports one-dimensional arrays. 49 | - Boolean: Shell scripting doesn't have a native Boolean data type. Instead, booleans are represented using integer values, where 0 represents false, and any non-zero value represents true. 50 | - Null: Represents the absence of a value or an undefined variable. 51 | --- 52 | 53 | - It's important to note that shell scripting is loosely typed, meaning variables can change their data type during execution. For example, a variable that initially stores an integer value can later hold a string value without any explicit type conversion. 54 | 55 | - In shell scripts, you can access the value of a variable using the dollar sign `$` followed by the variable name. For example, `$name` will give you the value of the name variable, and `${fruits[0]}` will give you the first element of the `fruits` array. 56 | --- 57 | - Check this out:
58 | ![variables][variables] 59 | 60 | - Try to output each one with `$ echo $[variable name]`!! 61 | --- 62 | ## Basic control structures 63 | ### if-else 64 | - In this script, we prompt the user to enter a number using the read command. Then, we use an if-else construct to check if the number is greater than 10. If the condition is true, we display a message indicating that the number is greater than 10. Otherwise, we display a message indicating that the number is less than or equal to 10. 65 | ![if-else][if-else] 66 | 67 | --- 68 | 69 | ### if-else-args 70 | - copy the if-else.sh into a new file calles `if-else-args.sh` 71 | - comment the 5th line 72 | - change the 7th line into this: `if [ $1 -gt 10 ]; then` 73 | 74 | ![if-else-args][if-else-args] 75 | 76 | 77 | --- 78 | ### for 79 | - In this script, we use a for loop to iterate over a sequence of numbers from 1 to 5. Within each iteration, we display a message indicating the current iteration number. 80 | - Loops allow us to automate repetitive tasks, such as processing multiple files, iterating over database records, or performing calculations on a range of values. They enhance the efficiency and productivity of our data engineering workflows. 81 | 82 | ![for][for] 83 | 84 | --- 85 | 86 | ### [assignment] if-else-args-loop 87 | - copy the if-else-args.sh into a new file called `if-else-args-loop.sh` 88 | - modify the new file so the output is similar to this (10 minutes)
89 | - hint: use `"$@"` to loop over arguments 90 | 91 | ![if-else-args-loop][if-else-args-loop] 92 | 93 | [welcome]: images/2-welcome.png 94 | [welcome-edit]: images/2-welcome-edit.png 95 | [if-else-args-loop]: images/2-if-else-args-loop.png 96 | [variables]: images/2-variables.png 97 | [if-else]: images/2-if-else.png 98 | [if-else-args]: images/2-if-else-args.png 99 | [for]: images/2-for.png 100 | 101 | -------------------------------------------------------------------------------- /Shell Scripting/session-3.md: -------------------------------------------------------------------------------- 1 | --- 2 | marp: true 3 | --- 4 | 5 | # Moving on! 6 | - Let's try to do some data processing! 7 | - make sure `sed`, `grep`, and `awk` is running in your environment 8 | - these text-based tools are so quick, it's so efficient even in a large number of texts 9 | --- 10 | 11 | # Let's learn 12 | 13 | --- 14 | 15 | ## The Stream Editor `sed` 16 | - Allows you to perform text transformations on an input stream (a file or input from a pipeline) and then output the modified stream. It is commonly used for search and replace operations, among other text manipulations. 17 | 18 | --- 19 | 20 | - Let's use this data and save it to a file called `data-sed.csv` 21 | ``` 22 | Nama, Usia, Kota 23 | Yusuf,17,New York 24 | Zalmawati,25,Perth 25 | Reza,28,Bekasi 26 | ``` 27 | 28 | - Our goal is to replace "Perth" with "Jakarta" in the Kota column. We can achieve this using the `sed` command as follows:
29 | `$ sed 's/Perth/Jakarta/' data-sed.csv > modified_data.csv` 30 | 31 | --- 32 | 33 | - Explanation: 34 | - `sed`: Invokes the sed command. 35 | - `s/Perth/Jakarta/`: This is the search and replace pattern. The `s` stands for substitute, and we're searching for "Perth" and replacing it with "Jakarta". 36 | - `sample_data.csv`: Specifies the input file, i.e., the file to perform the search and replace operation on. 37 | - `modified_data.csv`: Redirects the modified output to a new file named `modified_data.csv`. 38 | 39 | --- 40 | 41 | - Pro Tip: 42 | - You can also stream any output to `sed` to immediately change the value 43 | - You can use `regex` to perform even more powerful `search and replace` operations 44 | 45 | --- 46 | 47 | ## The Data Extractor `awk` 48 | - Operates on a per-line basis, reading input line by line and applying patterns and actions to process the data. `awk` excels at working with structured data, making it a great choice for parsing CSV files and performing various data manipulations. 49 | 50 | --- 51 | - Let's use the previous data file and copy it to a new file called `data-awk.csv` 52 | - We will try to extract the data using the format that we want:
`$ awk -F ',' 'NR > 1 {print $1 " berusia " $2 " tahun."}' data-awk.csv`
53 | 54 | ![awk-1][awk-1] 55 | 56 | --- 57 | - Explanation: 58 | - `-F ','`: This option specifies the field separator for awk. In this case, we set it to ',' to indicate that the CSV file uses commas as separators between fields. 59 | - `'...'`: The single quotes enclose the awk program. 60 | - `print $1 " berusia " $2 " tahun."`: This is the action part of the `awk` program. It prints the desired output, which consists of the first field ($1, representing the `Nama` column) and the second field ($2, representing the `Usia` column) 61 | - In this example, `awk` processes each line of the CSV file and prints the extracted information in the desired format. 62 | 63 | --- 64 | - Pro tip 65 | - `awk` can also perform conditional statements, loops. and calculations on data 66 | - You can use `regex` to perform even more powerful `data transformation` and `data cleaning ` operations 67 | --- 68 | ## The log file's best friend `grep` 69 | - Perfect for searching and filtering text based on patterns, including regular expressions. 70 | - It is commonly used to extract specific lines or patterns from files that match a given search criteria. 71 | 72 | --- 73 | - Let's use this data and save it to a file called `logfile.log` 74 | ``` 75 | 2022-01-01,INFO,User logged in 76 | 2022-01-01,ERROR,Invalid input detected 77 | 2022-01-02,WARNING,Disk space low 78 | 2022-01-02,ERROR,Database connection failed 79 | 2022-01-03,INFO,User logged out 80 | ``` 81 | 82 | - find all lines containing `ERROR` in the log file using this command: 83 | `$ grep "ERROR" logfile.log` 84 | 85 | - Explanation: 86 | - `grep "ERROR"`: This command searches for lines containing the word `ERROR` in the file `logfile.log` and prints those lines. 87 | 88 | --- 89 | - Now, let's use a regular expression to find lines that start with `2022-01-01`: 90 | `$ grep "^2022-01-01" logfile.log` 91 | - Explanation: 92 | - `grep "^2022-01-01"`: This command searches for lines that start with `2022-01-01` in the file `logfile.log` and prints those lines. 93 | 94 | --- 95 | ## [assignment] Data Processing 96 | ### Use this csv file, save it as `data-source.csv` 97 | ``` 98 | Nama,Umur 99 | Arnetha_Marchelina,25 100 | Sebastian_Cahyo_Ardhi_Iswara,30 101 | Helmi_Aziz_Muhammad,22 102 | Muhammad_Ridwan_Maulana,28 103 | Arif_Setiyawan,24 104 | Rahmatulloh,31 105 | Aidiel_Fitra,23 106 | Yusuf_Hanafi_Angkat,26 107 | Isharridho_Pratama,27 108 | Vincent_Junitio_Ungu,29 109 | Hairulloh_Sukur,33 110 | Yuandika_Alfahreiza,20 111 | Sahala_Josua_Sinaga,34 112 | Septian_Dwi_Kurnia,21 113 | Fitra_Anugrah,32 114 | Muhammad_Ridwan_Maulana,35 115 | ``` 116 | --- 117 | ### Tasks 118 | - Perform data cleansing using `sed` to replace underscores in the `Nama` column with spaces. 119 | - Perform data extraction using `awk` to format the output as `'Nama' berusia 'Umur'`. 120 | - Use `grep` to filter and group ages below-25 and 25-and-above. 121 | - Combine those commands into a single shell script file. 122 | - Clean your code, give comments, and output echoes as much as you can to help you test and troubleshoot. 123 | - Good luck! 124 | 125 | [sed-1]: images/3-sed-1.png 126 | [awk-1]: images/3-awk-1.png -------------------------------------------------------------------------------- /Python Programming/data-warehouse/sales_data_cleaned.csv: -------------------------------------------------------------------------------- 1 | country,type,channel,priority,date,id,units_sold,unit_price 2 | Tuvalu,Baby Food,Offline,H,2010-05-28,669165933,9925.0,255.28 3 | Sao Tome and Principe,Fruits,Online,C,2014-06-20,514321792,8102.0,9.33 4 | Angola,Household,Offline,M,2011-04-23,135425221,4187.0,668.27 5 | Burkina Faso,Vegetables,Online,H,2012-07-17,871543967,8082.0,154.06 6 | Republic of the Congo,Personal Care,Offline,M,2015-07-14,770463311,6070.0,81.73 7 | Senegal,Cereal,Online,H,2014-04-18,616607081,6593.0,205.7 8 | Kyrgyzstan,Vegetables,Online,H,2011-06-24,814711606,124.0,154.06 9 | Cape Verde,Clothes,Offline,H,2014-08-02,939825713,4168.0,109.28 10 | Bangladesh,Clothes,Online,L,2017-01-13,187310731,8263.0,109.28 11 | Honduras,Household,Offline,H,2017-02-08,522840487,8974.0,668.27 12 | Mongolia,Personal Care,Offline,C,2014-02-19,832401311,4901.0,81.73 13 | Bulgaria,Clothes,Online,M,2012-04-23,972292029,1673.0,109.28 14 | Sri Lanka,Cosmetics,Offline,M,2016-11-19,419123971,6952.0,437.2 15 | Cameroon,Beverages,Offline,C,2015-04-01,519820964,5430.0,47.45 16 | Turkmenistan,Household,Offline,L,2010-12-30,441619336,3830.0,668.27 17 | East Timor,Meat,Online,L,2012-07-31,322067916,5908.0,421.89 18 | Norway,Baby Food,Online,L,2014-05-14,819028031,7450.0,255.28 19 | Portugal,Baby Food,Online,H,2015-07-31,860673511,1273.0,255.28 20 | Honduras,Snacks,Online,L,2016-06-30,795490682,2225.0,152.58 21 | New Zealand,Fruits,Online,H,2014-09-08,142278373,2187.0,9.33 22 | Moldova ,Personal Care,Online,L,2016-05-07,740147912,5070.0,81.73 23 | France,Cosmetics,Online,H,2017-05-22,898523128,1815.0,437.2 24 | Kiribati,Fruits,Online,M,2014-10-13,347140347,5398.0,9.33 25 | Mali,Fruits,Online,L,2010-05-07,686048400,5822.0,9.33 26 | Norway,Beverages,Offline,C,2014-07-18,435608613,5124.0,47.45 27 | The Gambia,Household,Offline,L,2012-05-26,886494815,2370.0,668.27 28 | Switzerland,Cosmetics,Offline,M,2012-09-17,249693334,8661.0,437.2 29 | South Sudan,Personal Care,Offline,C,2013-12-29,406502997,2125.0,81.73 30 | Australia,Office Supplies,Online,C,2015-10-27,158535134,2924.0,651.21 31 | Myanmar,Household,Offline,H,2015-01-16,177713572,8250.0,668.27 32 | Djibouti,Snacks,Online,M,2017-02-25,756274640,7327.0,152.58 33 | Costa Rica,Personal Care,Offline,L,2017-05-08,456767165,6409.0,81.73 34 | Syria,Fruits,Online,L,2011-11-22,162052476,3784.0,9.33 35 | The Gambia,Meat,Online,M,2017-01-14,825304400,4767.0,421.89 36 | Brunei,Office Supplies,Online,L,2012-04-01,320009267,6708.0,651.21 37 | Bulgaria,Office Supplies,Online,M,2012-02-16,189965903,3987.0,651.21 38 | Niger,Personal Care,Online,H,2017-03-11,699285638,3015.0,81.73 39 | Azerbaijan,Cosmetics,Online,M,2010-02-06,382392299,7234.0,437.2 40 | The Gambia,Cereal,Offline,H,2012-06-07,994022214,2117.0,205.7 41 | Slovakia,Vegetables,Online,H,2012-10-06,759224212,171.0,154.06 42 | Myanmar,Clothes,Online,H,2015-11-14,223359620,5930.0,109.28 43 | Comoros,Cereal,Offline,H,2016-03-29,902102267,962.0,205.7 44 | Iceland,Cosmetics,Online,C,2016-12-31,331438481,8867.0,437.2 45 | Switzerland,Personal Care,Online,M,2010-12-23,617667090,273.0,81.73 46 | Macedonia,Clothes,Offline,C,2014-10-14,787399423,7842.0,109.28 47 | Mauritania,Office Supplies,Offline,C,2012-01-11,837559306,1266.0,651.21 48 | Albania,Clothes,Online,C,2010-02-02,385383069,2269.0,109.28 49 | Lesotho,Fruits,Online,L,2013-08-18,918419539,9606.0,9.33 50 | Saudi Arabia,Cereal,Online,M,2013-03-25,844530045,4063.0,205.7 51 | Sierra Leone,Office Supplies,Offline,M,2011-11-26,441888415,3457.0,651.21 52 | Sao Tome and Principe,Fruits,Offline,H,2013-09-17,508980977,7637.0,9.33 53 | Cote d'Ivoire,Clothes,Online,C,2012-06-08,114606559,3482.0,109.28 54 | Fiji,Clothes,Offline,C,2010-06-30,647876489,9905.0,109.28 55 | Austria,Cosmetics,Offline,H,2015-02-23,868214595,2847.0,437.2 56 | United Kingdom,Household,Online,L,2012-01-05,955357205,282.0,668.27 57 | Djibouti,Cosmetics,Offline,H,2014-04-07,259353148,7215.0,437.2 58 | Australia,Cereal,Offline,H,2013-06-09,450563752,682.0,205.7 59 | San Marino,Baby Food,Online,L,2013-06-26,569662845,4750.0,255.28 60 | Cameroon,Office Supplies,Online,M,2011-11-07,177636754,5518.0,651.21 61 | Libya,Clothes,Offline,H,2010-10-30,705784308,6116.0,109.28 62 | Haiti,Cosmetics,Offline,H,2013-10-13,505716836,1705.0,437.2 63 | Rwanda,Cosmetics,Offline,H,2013-10-11,699358165,4477.0,437.2 64 | Gabon,Personal Care,Offline,L,2012-07-08,228944623,8656.0,81.73 65 | Belize,Clothes,Offline,M,2016-07-25,807025039,5498.0,109.28 66 | Lithuania,Office Supplies,Offline,H,2010-10-24,166460740,8287.0,651.21 67 | Madagascar,Clothes,Offline,L,2015-04-25,610425555,7342.0,109.28 68 | Turkmenistan,Office Supplies,Online,M,2013-04-23,462405812,5010.0,651.21 69 | Libya,Fruits,Online,L,2015-08-14,816200339,673.0,9.33 70 | Democratic Republic of the Congo,Beverages,Online,C,2011-05-26,585920464,5741.0,47.45 71 | Djibouti,Cereal,Online,H,2017-05-20,555990016,8656.0,205.7 72 | Pakistan,Cosmetics,Offline,L,2013-07-05,231145322,9892.0,437.2 73 | Mexico,Household,Offline,C,2014-11-06,986435210,6954.0,668.27 74 | Federated States of Micronesia,Beverages,Online,C,2014-10-28,217221009,9379.0,47.45 75 | Laos,Vegetables,Offline,C,2011-09-15,789176547,3732.0,154.06 76 | Monaco,Baby Food,Offline,H,2012-05-29,688288152,8614.0,255.28 77 | Samoa ,Cosmetics,Online,H,2013-07-20,670854651,9654.0,437.2 78 | Spain,Household,Offline,L,2012-10-21,213487374,4513.0,668.27 79 | Lebanon,Clothes,Online,L,2012-09-18,663110148,7884.0,109.28 80 | Iran,Cosmetics,Online,H,2016-11-15,286959302,6489.0,437.2 81 | Zambia,Snacks,Online,L,2011-01-04,122583663,4085.0,152.58 82 | Kenya,Vegetables,Online,L,2012-03-18,827844560,6457.0,154.06 83 | Mexico,Personal Care,Offline,L,2012-02-17,430915820,6422.0,81.73 84 | Sao Tome and Principe,Beverages,Offline,C,2011-01-16,180283772,8829.0,47.45 85 | The Gambia,Baby Food,Offline,M,2014-02-03,494747245,5559.0,255.28 86 | Kuwait,Fruits,Online,M,2012-04-30,513417565,522.0,9.33 87 | Slovenia,Beverages,Offline,C,2016-10-23,345718562,4660.0,47.45 88 | Sierra Leone,Office Supplies,Offline,H,2016-12-06,621386563,948.0,651.21 89 | Australia,Beverages,Offline,H,2014-07-07,240470397,9389.0,47.45 90 | Azerbaijan,Office Supplies,Online,M,2012-06-13,423331391,2021.0,651.21 91 | Romania,Cosmetics,Online,H,2010-11-26,660643374,7910.0,437.2 92 | Nicaragua,Beverages,Offline,C,2011-02-08,963392674,8156.0,47.45 93 | Mali,Clothes,Online,M,2011-07-26,512878119,888.0,109.28 94 | Malaysia,Fruits,Offline,L,2011-11-11,810711038,6267.0,9.33 95 | Sierra Leone,Vegetables,Offline,C,2016-06-01,728815257,1485.0,154.06 96 | Mexico,Personal Care,Offline,M,2015-07-30,559427106,5767.0,81.73 97 | Mozambique,Household,Offline,L,2012-02-10,665095412,5367.0,668.27 98 | Indonesia,Fruits,Online,H,2012-08-22,669165412,97.0,9925.0 99 | -------------------------------------------------------------------------------- /Python Programming/sources/sales_data.csv: -------------------------------------------------------------------------------- 1 | country,type,channel,priority,date,id,units_sold,unit_price 2 | Tuvalu,Baby Food,Offline,H,2010-05-28,669165933,9925,255.28 3 | Grenada,Cereal,Online,C,2012-08-22,963881480,,205.7 4 | Russia,,Offline,L,2014-05-02,341417157,1779,651.21 5 | Sao Tome and Principe,Fruits,Online,C,2014-06-20,514321792,8102,9.33 6 | Rwanda,Office Supplies,Offline,L,,115456712,5062,651.21 7 | ,Baby Food,Online,C,2015-02-04,547995746,2974,255.28 8 | Angola,Household,Offline,M,2011-04-23,135425221,4187,668.27 9 | Burkina Faso,Vegetables,Online,H,2012-07-17,871543967,8082,154.06 10 | Republic of the Congo,Personal Care,Offline,M,2015-07-14,770463311,6070,81.73 11 | Senegal,Cereal,Online,H,2014-04-18,616607081,6593,205.7 12 | Kyrgyzstan,Vegetables,Online,H,2011-06-24,814711606,124,154.06 13 | Cape Verde,Clothes,Offline,H,2014-08-02,939825713,4168,109.28 14 | Bangladesh,Clothes,Online,L,2017-01-13,187310731,8263,109.28 15 | Honduras,Household,Offline,H,2017-02-08,522840487,8974,668.27 16 | Mongolia,Personal Care,Offline,C,2014-02-19,832401311,4901,81.73 17 | Bulgaria,Clothes,Online,M,2012-04-23,972292029,1673,109.28 18 | Sri Lanka,Cosmetics,Offline,M,2016-11-19,419123971,6952,437.2 19 | Cameroon,Beverages,Offline,C,2015-04-01,519820964,5430,47.45 20 | Turkmenistan,Household,Offline,L,2010-12-30,441619336,3830,668.27 21 | East Timor,Meat,Online,L,2012-07-31,322067916,5908,421.89 22 | Norway,Baby Food,Online,L,2014-05-14,819028031,7450,255.28 23 | Portugal,Baby Food,Online,H,2015-07-31,860673511,1273,255.28 24 | Honduras,Snacks,Online,L,2016-06-30,795490682,2225,152.58 25 | New Zealand,Fruits,Online,H,2014-09-08,142278373,2187,9.33 26 | Moldova ,Personal Care,Online,L,2016-05-07,740147912,5070,81.73 27 | France,Cosmetics,Online,H,2017-05-22,898523128,1815,437.2 28 | Kiribati,Fruits,Online,M,2014-10-13,347140347,5398,9.33 29 | Mali,Frutis,Online,L,2010-05-07,686048400,5822,9.33 30 | Norway,Beverages,Offline,C,2014-07-18,435608613,5124,47.45 31 | The Gambia,Household,Offline,L,2012-05-26,886494815,2370,668.27 32 | Switzerland,Cosmetics,Offline,M,2012-09-17,249693334,8661,437.2 33 | South Sudan,Personal Care,Offline,C,2013-12-29,406502997,2125,81.73 34 | Australia,Office Supplies,Online,C,2015-10-27,158535134,2924,651.21 35 | Myanmar,Household,Offline,H,2015-01-16,177713572,8250,668.27 36 | Djibouti,Snacks,Online,M,2017-02-25,756274640,7327,152.58 37 | Costa Rica,Personal Care,Offline,L,2017-05-08,456767165,6409,81.73 38 | Syria,Fruits,Online,L,2011-11-22,162052476,3784,9.33 39 | The Gambia,Meat,Online,M,2017-01-14,825304400,4767,421.89 40 | Brunei,Office Supplies,Online,L,2012-04-01,320009267,6708,651.21 41 | Bulgaria,Office Supplies,Online,M,2012-02-16,189965903,3987,651.21 42 | Niger,Personal Care,Online,H,2017-03-11,699285638,3015,81.73 43 | Azerbaijan,Cosmetics,Online,M,2010-02-06,382392299,7234,437.2 44 | The Gambia,Cereal,Offline,H,2012-06-07,994022214,2117,205.7 45 | Slovakia,Vegetables,Online,H,2012-10-06,759224212,171,154.06 46 | Myanmar,Clothes,Online,H,2015-11-14,223359620,5930,109.28 47 | Comoros,Cereal,Offline,H,2016-03-29,902102267,962,205.7 48 | Iceland,Cosmetics,Online,C,2016-12-31,331438481,8867,437.2 49 | Switzerland,Personal Care,Online,M,2010-12-23,617667090,273,81.73 50 | Macedonia,Clothes,Offline,C,2014-10-14,787399423,7842,109.28 51 | Mauritania,Office Supplies,Offline,C,2012-01-11,837559306,1266,651.21 52 | Albania,Clothes,Online,C,2010-02-02,385383069,2269,109.28 53 | Lesotho,Fruits,Online,L,2013-08-18,918419539,9606,9.33 54 | Saudi Arabia,Cereal,Online,M,2013-03-25,844530045,4063,205.7 55 | Sierra Leone,Office Supplies,Offline,M,2011-11-26,441888415,3457,651.21 56 | Sao Tome and Principe,Fruits,Offline,H,2013-09-17,508980977,7637,9.33 57 | Cote d'Ivoire,Clothes,Online,C,2012-06-08,114606559,3482,109.28 58 | Fiji,Clothes,Offline,C,2010-06-30,647876489,9905,109.28 59 | Austria,Cosmetics,Offline,H,2015-02-23,868214595,2847,437.2 60 | United Kingdom,Household,Online,L,2012-01-05,955357205,282,668.27 61 | Djibouti,Cosmetics,Offline,H,2014-04-07,259353148,7215,437.2 62 | Australia,Cereal,Offline,H,2013-06-09,450563752,682,205.7 63 | San Marino,Baby Food,Online,L,2013-06-26,569662845,4750,255.28 64 | Cameroon,Office Supplies,Online,M,2011-11-07,177636754,5518,651.21 65 | Libya,Clothes,Offline,H,2010-10-30,705784308,6116,109.28 66 | Haiti,Cosmetics,Offline,H,2013-10-13,505716836,1705,437.2 67 | Rwanda,Cosmetics,Offline,H,2013-10-11,699358165,4477,437.2 68 | Gabon,Personal Care,Offline,L,2012-07-08,228944623,8656,81.73 69 | Belize,Clothes,Offline,M,2016-07-25,807025039,5498,109.28 70 | Lithuania,Office Supplies,Offline,H,2010-10-24,166460740,8287,651.21 71 | Madagascar,Clothes,Offline,L,2015-04-25,610425555,7342,109.28 72 | Turkmenistan,Office Supplies,Online,M,2013-04-23,462405812,5010,651.21 73 | Libya,Fruits,Online,L,2015-08-14,816200339,673,9.33 74 | Democratic Republic of the Congo,Beverages,Online,C,2011-05-26,585920464,5741,47.45 75 | Djibouti,Cereal,Online,H,2017-05-20,555990016,8656,205.7 76 | Pakistan,Cosmetics,Offline,L,2013-07-05,231145322,9892,437.2 77 | Mexico,Household,Offline,C,2014-11-06,986435210,6954,668.27 78 | Federated States of Micronesia,Beverages,Online,C,2014-10-28,217221009,9379,47.45 79 | Laos,Vegetables,Offline,C,2011-09-15,789176547,3732,154.06 80 | Monaco,Baby Food,Offline,H,2012-05-29,688288152,8614,255.28 81 | Samoa ,Cosmetics,Online,H,2013-07-20,670854651,9654,437.2 82 | Spain,Household,Offline,L,2012-10-21,213487374,4513,668.27 83 | Lebanon,Clothes,Online,L,2012-09-18,663110148,7884,109.28 84 | Iran,Cosmetics,Online,H,2016-11-15,286959302,6489,437.2 85 | Zambia,Snacks,Online,L,2011-01-04,122583663,4085,152.58 86 | Kenya,Vegetables,Online,L,2012-03-18,827844560,6457,154.06 87 | Mexico,Personal Care,Offline,L,2012-02-17,430915820,6422,81.73 88 | Sao Tome and Principe,Beverages,Offline,C,2011-01-16,180283772,8829,47.45 89 | The Gambia,Baby Food,Offline,M,2014-02-03,494747245,5559,255.28 90 | Kuwait,Fruits,Online,M,2012-04-30,513417565,522,9.33 91 | Slovenia,Beverages,Offline,C,2016-10-23,345718562,4660,47.45 92 | Sierra Leone,Office Supplies,Offline,H,2016-12-06,621386563,948,651.21 93 | Australia,Beverages,Offline,H,2014-07-07,240470397,9389,47.45 94 | Azerbaijan,Office Supplies,Online,M,2012-06-13,423331391,2021,651.21 95 | Romania,Cosmetics,Online,H,2010-11-26,660643374,7910,437.2 96 | Nicaragua,Beverages,Offline,C,2011-02-08,963392674,8156,47.45 97 | Mali,Clothes,Online,M,2011-07-26,512878119,888,109.28 98 | Malaysia,Fruits,Offline,L,2011-11-11,810711038,6267,9.33 99 | Sierra Leone,Vegetables,Offline,C,2016-06-01,728815257,1485,154.06 100 | Mexico,Personal Care,Offline,M,2015-07-30,559427106,5767,81.73 101 | Mozambique,Household,Offline,L,2012-02-10,665095412,5367,668.27 102 | Indonesia,Fruits,Online,H,2012-08-22,669165412,97,9925 103 | Mexico,Personal Care,Offline,M,2015-07-30,559427106,5767,81.73 104 | -------------------------------------------------------------------------------- /IntroSQL/README.md: -------------------------------------------------------------------------------- 1 | # Hands ON materi Workshop Intro SQL 2 | This Repository contain source code for event Workshop Intro SQL using PostgreSQL 3 | 4 | ## Prerequisite 5 | - PostgreSQL version 14 or above with include pgadmin. [Download](https://www.enterprisedb.com/downloads/postgres-postgresql-downloads) 6 | - DBeaver. [Download](https://dbeaver.io/download/) 7 | - VSCode (optional). [Download](https://code.visualstudio.com/download) 8 | 9 |
10 | 11 | ## 1) Create New Database Connection 12 | - Open DBeaver 13 | - Click New Database Connection 14 | - Choose PostgreSQL then click Next 15 | - Input Host : localhost, databases postgres, username: postgres, password: your DB password then click Finish 16 | ![postgresql connection info](./images/postgreSQL_connection_info.png) 17 | 18 |
19 | 20 | ## 2) Create New Database 21 | - Right click databases postgres then click SQL Editor --> Open SQL script 22 | ![sql editor](./images/sql_editor.png) 23 | - Type Query bellow then click execute sql query 24 | ``` 25 | CREATE DATABASE demo_intro_sql; 26 | ``` 27 |
28 | 29 | ## 3) Hands On DDL 30 | ### 3.1 CREATE Table 31 | - Type Query bellow in SQL Editor for Create new Table 32 | ``` 33 | CREATE TABLE book( 34 | book_id SERIAL PRIMARY KEY, 35 | book_name VARCHAR(50), 36 | book_category VARCHAR(15), 37 | qty INT, 38 | unit_price REAL 39 | ); 40 | ``` 41 |
42 | 43 | ### 3.2 Alter Table 44 | #### 3.2.1 ALTER Table Add Column 45 | - Type Query bellow in SQL Editor 46 | ``` 47 | ALTER TABLE book 48 | ADD COLUMN created_dt DATE, 49 | ADD COLUMN changed_dt DATE, 50 | ADD COLUMN to_be_deleted INT; 51 | ``` 52 | #### 3.2.2 ALTER Table Drop Column 53 | - Type Query bellow in SQL Editor 54 | ~~~ 55 | ALTER TABLE book 56 | DROP COLUMN created_dt, 57 | DROP COLUMN changed_dt; 58 | ~~~ 59 | #### 3.2.3 ALTER Table Rename Column 60 | - Type Query bellow in SQL Editor 61 | ~~~ 62 | ALTER TABLE book 63 | RENAME COLUMN qty TO unit_qty; 64 | ~~~ 65 | #### 3.2.4 ALTER Table Modify Datatype 66 | - Type Query bellow in SQL Editor 67 | ``` 68 | ALTER TABLE book 69 | ALTER COLUMN to_be_deleted TYPE VARCHAR; 70 | ``` 71 |
72 | 73 | ### 3.3 TRUNCATE Table 74 | - Type Query bellow in SQL Editor 75 | ``` 76 | TRUNCATE TABLE book; 77 | ``` 78 |
79 | 80 | ## 4) Hands On DML 81 | ### 4.1 INSERT Data 82 | - Download Example Database northwind [here](https://github.com/pthom/northwind_psql/blob/master/northwind.sql). 83 | - Open northwind.sql in DBever then run all query 84 | - Type Query bellow in SQL Editor 85 | ``` 86 | INSERT INTO categories VALUES (9, 'Fruits', 'Fruits like Banana, mango, Apple etc', '\x'); 87 | ``` 88 |
89 | 90 | ### 4.2 UPDATE Data 91 | - Retrieve all data in products table using query bellow. 92 | ``` 93 | SELECT * FROM products; 94 | ``` 95 | - We see there are several product that have stock is 0 so we need update value to not 0. In this workshop, update stock become 20. Type query UPDATE bellow : 96 | ``` 97 | UPDATE products 98 | SET units_in_stock = 20 99 | WHERE units_in_stock = 0 100 | RETURNING *; 101 | ``` 102 | ![update_result](./images/update_returning.png) 103 |
104 | 105 | ### 4.3 DELETE Data 106 | - Type Query bellow in SQL Editor 107 | ``` 108 | DELETE FROM categories 109 | WHERE category_name = 'Fruits' 110 | RETURNING *; 111 | ``` 112 | 113 |
114 | 115 | ## 5) Hands On DQL 116 | ### 5.1 Basic Select 117 | ``` 118 | SELECT * FROM products; 119 | ``` 120 | ### 5.2 Select with Where clause 121 | ``` 122 | SELECT * FROM products 123 | WHERE unit_price > 10 124 | ``` 125 | ### 5.3 Select with sorting column 126 | ``` 127 | SELECT * FROM products 128 | WHERE unit_price > 10 129 | ORDER BY units_in_stock DESC 130 | ``` 131 | ### 5.4 Limit Retrieve Data 132 | - Retrieve Top 10 Data 133 | ``` 134 | SELECT * FROM products 135 | WHERE unit_price > 10 136 | ORDER BY units_in_stocks DESC 137 | LIMIT 10 138 | ``` 139 | - Retrieve Top 10 Data with skip 2 record from top 140 | ``` 141 | SELECT * FROM products 142 | WHERE unit_price > 10 143 | ORDER BY units_in_stocks DESC 144 | LIMIT 10 OFFSET 2 145 | ``` 146 | ### 5.5 Display Unique Data 147 | ``` 148 | SELECT DISTINCT category_id 149 | FROM products 150 | ``` 151 | ### 5.6 Aggregate Function 152 | - Display maximum value of unit_price in products table 153 | ``` 154 | SELECT max(unit_price) as max_unit_price FROM products 155 | ``` 156 | - Display minimum value of unit_price in products table 157 | ``` 158 | SELECT min(unit_price) as min_unit_price FROM products 159 | ``` 160 | 161 | - Display total data in products table 162 | ``` 163 | SELECT count(*) as total_data FROM products 164 | ``` 165 | 166 | - Display total of unit_price in products table 167 | ``` 168 | SELECT sum(unit_price) as sum_unit_price FROM products 169 | ``` 170 | 171 | - Display average of unit_price in products table 172 | ``` 173 | SELECT avg(unit_price) as average_unit_price FROM products 174 | ``` 175 | 176 | ### 5.7 Grouping Data 177 | - Display Top 10 product name and maximum unit price with grouping by product name, sorting with highest maximum unit price 178 | ``` 179 | SELECT product_name, max(unit_price) as max_unit_price 180 | FROM products 181 | GROUP BY product_name 182 | ORDER BY max_unit_price DESC 183 | LIMIT 10 184 | ``` 185 | 186 | - Display Top 10 product name and maximum unit price with grouping by product name, sorting with highest maximum unit price and also maximum unit price more than 50 187 | ``` 188 | SELECT product_name, max(unit_price) as max_unit_price 189 | FROM products 190 | GROUP BY product_name 191 | HAVING max(unit_price) > 50 192 | ORDER BY max_unit_price DESC 193 | LIMIT 10 194 | ``` 195 | 196 | ### 5.8 JOIN 197 | - Display territory and region using JOIN 198 | ``` 199 | SELECT t.territory_description, r.region_description 200 | FROM territories t, region r 201 | WHERE t.region_id = r.region_id 202 | ``` 203 | ``` 204 | SELECT t.territory_description, r.region_description 205 | FROM territories t INNER JOIN region r 206 | ON t.region_id = r.region_id 207 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using INNER JOIN 208 | ``` 209 | SELECT 210 | p.product_name, 211 | o.order_date, 212 | od.unit_price, 213 | od.quantity, 214 | (od.unit_price * od.quantity) total_price 215 | FROM orders o 216 | INNER JOIN order_details od 217 | ON o.order_id = od.order_id 218 | INNER JOIN products p 219 | ON od.product_id = p.product_id 220 | GROUP BY 221 | p.product_name, 222 | o.order_date, 223 | od.unit_price, 224 | od.quantity 225 | ORDER BY total_price desc 226 | LIMIT 10 227 | ``` 228 | ![inner join query result](./images/inner_join_result.png) 229 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using LEFT JOIN 230 | - Add 1 record in orders table only 231 | ``` 232 | INSERT INTO orders 233 | VALUES (11078, 'SIMOB', 7, '1998-05-06', '1998-06-03', NULL, 2, 18.4400005, 'Simons bistro', 'Vinbæltet 34', 'Kobenhavn', NULL, '1734', 'Denmark'); 234 | ``` 235 | - Run query bellow 236 | ``` 237 | SELECT 238 | p.product_name, 239 | o.order_date, 240 | od.unit_price, 241 | od.quantity, 242 | (od.unit_price * od.quantity) total_price 243 | FROM orders o 244 | LEFT JOIN order_details od 245 | ON o.order_id = od.order_id 246 | LEFT JOIN products p 247 | ON od.product_id = p.product_id 248 | GROUP BY 249 | p.product_name, 250 | o.order_date, 251 | od.unit_price, 252 | od.quantity 253 | ORDER BY total_price DESC 254 | LIMIT 10 255 | ``` 256 | ![left join query result](./images/left_join_result.png) 257 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using RIGHT JOIN 258 | - Add 1 record in products table only 259 | ``` 260 | INSERT INTO products 261 | VALUES (78, 'Original Computer', 13, 2, '12 boxes', 13, 32, 0, 15, 0); 262 | ``` 263 | - Run query bellow 264 | ``` 265 | SELECT 266 | p.product_name, 267 | o.order_date, 268 | od.unit_price, 269 | od.quantity, 270 | (od.unit_price * od.quantity) total_price 271 | FROM orders o 272 | RIGHT JOIN order_details od 273 | ON o.order_id = od.order_id 274 | RIGHT JOIN products p 275 | ON od.product_id = p.product_id 276 | GROUP BY 277 | p.product_name, 278 | o.order_date, 279 | od.unit_price, 280 | od.quantity 281 | ORDER BY total_price DESC 282 | LIMIT 10 283 | ``` 284 | ![right join query result](./images/right_join_result.png) 285 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using FULL JOIN 286 | ``` 287 | SELECT 288 | p.product_name, 289 | o.order_date, 290 | od.unit_price, 291 | od.quantity, 292 | (od.unit_price * od.quantity) total_price 293 | FROM orders o 294 | FULL JOIN order_details od 295 | ON o.order_id = od.order_id 296 | FULL JOIN products p 297 | ON od.product_id = p.product_id 298 | GROUP BY 299 | p.product_name, 300 | o.order_date, 301 | od.unit_price, 302 | od.quantity 303 | ORDER BY total_price DESC 304 | LIMIT 10 305 | ``` 306 | ![full join query result](./images/full_join_result.png) 307 | 308 | 309 | 310 | ## 5) Challenge Question 311 | Please click this [link](./challenges.md) 312 | 313 | -------------------------------------------------------------------------------- /Python Programming/python-basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "### Docs : https://docs.google.com/presentation/d/1LAtAlk2TzYmGGZjE7nSI0899SNKihO4Ry-o0ir19oGs/preview" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "source": [ 13 | "## Variables" 14 | ], 15 | "metadata": {} 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "source": [ 21 | "nama = \"Ardhi\"\n", 22 | "jenis_kelamin = \"Pria\"\n", 23 | "umur = 35" 24 | ], 25 | "outputs": [], 26 | "metadata": {} 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "source": [ 31 | "## Data Types - String" 32 | ], 33 | "metadata": {} 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "source": [ 39 | "nama_barang = 'Laptop'\n", 40 | "tipe_barang = \"A51K\"\n", 41 | "alamat_pengiriman = \"Jl. Maju Mundur Ditempat No. 31, Jakarta\"" 42 | ], 43 | "outputs": [], 44 | "metadata": {} 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "source": [ 49 | "## Data Types - Integer" 50 | ], 51 | "metadata": {} 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "source": [ 57 | "stock_barang = 20\n", 58 | "barang_terjual = 950\n", 59 | "jumlah_anak = 3" 60 | ], 61 | "outputs": [], 62 | "metadata": {} 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "source": [ 67 | "## Data Types - Float" 68 | ], 69 | "metadata": {} 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "source": [ 75 | "berat_badan = 35.7\n", 76 | "tinggi_badan = 170.5\n", 77 | "diskon = 0.5" 78 | ], 79 | "outputs": [], 80 | "metadata": {} 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "source": [ 85 | "## Data Types - Boolean" 86 | ], 87 | "metadata": {} 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "source": [ 93 | "user_active = True\n", 94 | "married_status = False\n", 95 | "ready_to_ship = True" 96 | ], 97 | "outputs": [], 98 | "metadata": {} 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "source": [ 103 | "## Data Types - List" 104 | ], 105 | "metadata": {} 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "source": [ 111 | "warna = [\"merah\", \"biru\", \"kuning\"]\n", 112 | "weight_history = [25.9, 28.1, 30.0, 32.4]\n", 113 | "random = [1, \"aku\", 34.8, '$', False]\n", 114 | "\n", 115 | "# get spesific item\n", 116 | "print(warna[1])" 117 | ], 118 | "outputs": [ 119 | { 120 | "output_type": "stream", 121 | "name": "stdout", 122 | "text": [ 123 | "biru\n" 124 | ] 125 | } 126 | ], 127 | "metadata": {} 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "source": [ 132 | "## Data Types - Dictionary" 133 | ], 134 | "metadata": {} 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "source": [ 140 | "identitas = {\"nama\": \"Ardhi\", \"umur\": 31, \"jenis_kelamin\": \"Pria\"}\n", 141 | "product_detail = {\"nama\": \"Asis ROG\", \"type\": \"A51K\", \"diskon\": 0.2, \"harga\": 250000.0}\n", 142 | "mawar = {\"tersedia\": True, \"warna\": [\"merah\", \"putih\"] }\n", 143 | "\n", 144 | "# get spesific item\n", 145 | "identitas[\"umur\"]" 146 | ], 147 | "outputs": [ 148 | { 149 | "output_type": "execute_result", 150 | "data": { 151 | "text/plain": [ 152 | "31" 153 | ] 154 | }, 155 | "metadata": {}, 156 | "execution_count": 7 157 | } 158 | ], 159 | "metadata": {} 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "source": [ 164 | "## Data Types - Tuple" 165 | ], 166 | "metadata": {} 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 8, 171 | "source": [ 172 | "jenis_lagu = (\"pop\", \"rock\", \"dangdut\")\n", 173 | "penyanyi = (\"Awin Fals\", 33, \"Album Kenangan\")\n", 174 | "random = (22.3, \"gunting\", True, 0)\n", 175 | "\n", 176 | "# print(random)\n", 177 | "random[2] = \"ucup\"\n", 178 | "print(random)\n", 179 | "\n", 180 | "# get spesific item\n", 181 | "# jenis_lagu[2]" 182 | ], 183 | "outputs": [ 184 | { 185 | "output_type": "error", 186 | "ename": "TypeError", 187 | "evalue": "'tuple' object does not support item assignment", 188 | "traceback": [ 189 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 190 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 191 | "Input \u001b[0;32mIn [8]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m random \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m22.3\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgunting\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# print(random)\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m random[\u001b[38;5;241m2\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mucup\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(random)\n", 192 | "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment" 193 | ] 194 | } 195 | ], 196 | "metadata": {} 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "source": [ 201 | "## Data Types - None" 202 | ], 203 | "metadata": {} 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "source": [ 209 | "resep_obat = None\n", 210 | "mata_kuliah = None\n", 211 | "status_karyawan = None" 212 | ], 213 | "outputs": [ 214 | { 215 | "output_type": "error", 216 | "ename": "SyntaxError", 217 | "evalue": "invalid syntax (318281694.py, line 1)", 218 | "traceback": [ 219 | "\u001b[0;36m Input \u001b[0;32mIn [45]\u001b[0;36m\u001b[0m\n\u001b[0;31m resep_obat =\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" 220 | ] 221 | } 222 | ], 223 | "metadata": {} 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "source": [ 228 | "## Conditionals - If statement" 229 | ], 230 | "metadata": {} 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "source": [ 236 | "nilai = 1\n", 237 | "if nilai == 1:\n", 238 | " # kode dibawah ini akan dieksekusi jika nilainya adalah 1\n", 239 | " print(\"ini adalah angka satu\") \n", 240 | "else:\n", 241 | " # kode dibawah ini akan dieksekusi jika nilainya bukan 1 \n", 242 | " print(\"ini bukanlah angka 1\")" 243 | ], 244 | "outputs": [ 245 | { 246 | "output_type": "error", 247 | "ename": "IndentationError", 248 | "evalue": "expected an indented block (3315241015.py, line 4)", 249 | "traceback": [ 250 | "\u001b[0;36m Input \u001b[0;32mIn [30]\u001b[0;36m\u001b[0m\n\u001b[0;31m print(\"ini adalah angka satu\")\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n" 251 | ] 252 | } 253 | ], 254 | "metadata": {} 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "source": [ 259 | "## Indentation" 260 | ], 261 | "metadata": {} 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "source": [ 267 | "nama = \"Ardhi\"\n", 268 | "if nama == \"Ardhi\":\n", 269 | " print(\"Halo\")\n", 270 | " print(\"Nama saya \" + nama)" 271 | ], 272 | "outputs": [], 273 | "metadata": {} 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "source": [ 278 | "## Comment" 279 | ], 280 | "metadata": {} 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "source": [ 286 | "# kode dibawah ini adalah untuk menghitung 10 x 2\n", 287 | "perkalian = 10 * 2\n", 288 | "print(perkalian)\n", 289 | "\n", 290 | "'''\n", 291 | "Komentar ini dapat digunakan untuk multi baris\n", 292 | "sehingga memudahkan untuk memberikan komentar \n", 293 | "yang panjang\n", 294 | "'''\n", 295 | "\n", 296 | "# print(\"Halo\")\n", 297 | "print(\"Nama saya Ardhi\")" 298 | ], 299 | "outputs": [ 300 | { 301 | "output_type": "stream", 302 | "name": "stdout", 303 | "text": [ 304 | "20\n", 305 | "Nama saya Ardhi\n" 306 | ] 307 | } 308 | ], 309 | "metadata": {} 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "source": [ 314 | "## Built-in Funcions" 315 | ], 316 | "metadata": {} 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "source": [ 322 | "# len : untuk menghitung jumlah panjangnya suatu value\n", 323 | "warna = [\"merah\", \"biru\", \"kuning\"]\n", 324 | "len(warna)\n", 325 | "\n", 326 | "# min : untuk mengambil nilai terkecil\n", 327 | "nilai_rapor = [22.0, 32.4, 37.3]\n", 328 | "min(nilai_rapor)\n", 329 | "\n", 330 | "# print : untuk menampilkan value \n", 331 | "print(\"Halo, saya Ardhi\")\n", 332 | "\n", 333 | "\n" 334 | ], 335 | "outputs": [ 336 | { 337 | "output_type": "stream", 338 | "name": "stdout", 339 | "text": [ 340 | "Halo, saya Ardhi\n" 341 | ] 342 | } 343 | ], 344 | "metadata": {} 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "source": [ 349 | "## Type Cast" 350 | ], 351 | "metadata": {} 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "source": [ 357 | "# integer\n", 358 | "harga = 1000\n", 359 | "\n", 360 | "# ubah menjadi float\n", 361 | "harga_float = float(harga)\n", 362 | "print(harga_float)\n", 363 | "print(type(harga_float))\n", 364 | "\n", 365 | "# ubah menjadi string\n", 366 | "harga_str = str(harga)\n", 367 | "print(harga_str)\n", 368 | "print(type(harga_str))" 369 | ], 370 | "outputs": [ 371 | { 372 | "output_type": "stream", 373 | "name": "stdout", 374 | "text": [ 375 | "1000.0\n", 376 | "\n", 377 | "1000\n", 378 | "\n" 379 | ] 380 | } 381 | ], 382 | "metadata": {} 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "source": [ 387 | "## Exception" 388 | ], 389 | "metadata": {} 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "source": [ 395 | "nilai = \"20.4\"\n", 396 | "try:\n", 397 | " hitung = 10 + nilai\n", 398 | " print(hitung)\n", 399 | "except Exception as e:\n", 400 | " print(e)" 401 | ], 402 | "outputs": [ 403 | { 404 | "output_type": "stream", 405 | "name": "stdout", 406 | "text": [ 407 | "unsupported operand type(s) for +: 'int' and 'str'\n" 408 | ] 409 | } 410 | ], 411 | "metadata": {} 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "source": [ 416 | "## Function" 417 | ], 418 | "metadata": {} 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "source": [ 424 | "def tampilkan_teks(input_text):\n", 425 | " '''\n", 426 | " input : input text (str)\n", 427 | " output: print()\n", 428 | " '''\n", 429 | " print(input_text)\n", 430 | "\n", 431 | "\n", 432 | "input_text = \"Halo, nama saya Ardhi\"\n", 433 | "tampilkan_teks(input_text)\n", 434 | "\n", 435 | "say_hi = \"Halo, nama saya udin\"\n", 436 | "tampilkan_teks(say_hi)\n" 437 | ], 438 | "outputs": [ 439 | { 440 | "output_type": "stream", 441 | "name": "stdout", 442 | "text": [ 443 | "Halo, nama saya Ardhi\n", 444 | "Halo, nama saya udin\n" 445 | ] 446 | } 447 | ], 448 | "metadata": {} 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "source": [ 453 | "## Import Packages" 454 | ], 455 | "metadata": {} 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "source": [ 461 | "import wget\n", 462 | "\n", 463 | "url = 'https://sample-videos.com/csv/Sample-Spreadsheet-10-rows.csv'\n", 464 | "download = wget.download(url)" 465 | ], 466 | "outputs": [], 467 | "metadata": {} 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "source": [ 472 | "## Loop" 473 | ], 474 | "metadata": {} 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "source": [ 480 | "# While loop akan dilakukan selama kondisinya memenuhi (True)\n", 481 | "count = 1\n", 482 | "while (count < 4):\n", 483 | " print (\"The count is: \", count)\n", 484 | " count = count + 1\n", 485 | "\n", 486 | "print (\"selesai!\")" 487 | ], 488 | "outputs": [ 489 | { 490 | "output_type": "stream", 491 | "name": "stdout", 492 | "text": [ 493 | "The count is: 1\n", 494 | "The count is: 2\n", 495 | "The count is: 3\n", 496 | "selesai!\n" 497 | ] 498 | } 499 | ], 500 | "metadata": {} 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "source": [ 506 | "# for loop digunakan untuk mengulang sebuah item / urutan apapun\n", 507 | "angka = [1,2,3,4,5]\n", 508 | "for x in angka:\n", 509 | " print(x)" 510 | ], 511 | "outputs": [ 512 | { 513 | "output_type": "stream", 514 | "name": "stdout", 515 | "text": [ 516 | "1\n", 517 | "2\n", 518 | "3\n", 519 | "4\n", 520 | "5\n" 521 | ] 522 | } 523 | ], 524 | "metadata": {} 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "source": [ 530 | "nama_barang = \"sepatu\"\n", 531 | "\n", 532 | "NAMA_barang = \"mobil\"\n", 533 | "\n", 534 | "print(nama_barang)" 535 | ], 536 | "outputs": [ 537 | { 538 | "output_type": "stream", 539 | "name": "stdout", 540 | "text": [ 541 | "sepatu\n" 542 | ] 543 | } 544 | ], 545 | "metadata": {} 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": null, 550 | "source": [ 551 | "nilai = 10\n", 552 | "nilai_tambahan = 10\n", 553 | "\n", 554 | "hasil = nilai + nilai_tambahan\n", 555 | "print(hasil)" 556 | ], 557 | "outputs": [ 558 | { 559 | "output_type": "stream", 560 | "name": "stdout", 561 | "text": [ 562 | "20\n" 563 | ] 564 | } 565 | ], 566 | "metadata": {} 567 | } 568 | ], 569 | "metadata": { 570 | "orig_nbformat": 4, 571 | "language_info": { 572 | "name": "python", 573 | "version": "3.8.10", 574 | "mimetype": "text/x-python", 575 | "codemirror_mode": { 576 | "name": "ipython", 577 | "version": 3 578 | }, 579 | "pygments_lexer": "ipython3", 580 | "nbconvert_exporter": "python", 581 | "file_extension": ".py" 582 | }, 583 | "kernelspec": { 584 | "name": "python3", 585 | "display_name": "Python 3.8.10 64-bit" 586 | }, 587 | "interpreter": { 588 | "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 589 | } 590 | }, 591 | "nbformat": 4, 592 | "nbformat_minor": 2 593 | } -------------------------------------------------------------------------------- /Python Programming/data-pipeline-breakdown.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "## Example Data Engineer Task\n", 7 | "### - implementasi ETL dalam kode python\n", 8 | "### - Extract CSV File -> Transformasi (Data Cleaning) -> Load ke folder data-warehouse" 9 | ], 10 | "metadata": {} 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "source": [ 16 | "# import library\n", 17 | "import pandas as pd" 18 | ], 19 | "outputs": [], 20 | "metadata": {} 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "source": [ 26 | "# read data\n", 27 | "df = pd.read_csv(\"sources/sales_data.csv\")" 28 | ], 29 | "outputs": [], 30 | "metadata": {} 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "source": [ 36 | "# show top 5 data\n", 37 | "df.head()" 38 | ], 39 | "outputs": [ 40 | { 41 | "output_type": "execute_result", 42 | "data": { 43 | "text/plain": [ 44 | " country type channel priority date \\\n", 45 | "0 Tuvalu Baby Food Offline H 2010-05-28 \n", 46 | "1 Grenada Cereal Online C 2012-08-22 \n", 47 | "2 Russia NaN Offline L 2014-05-02 \n", 48 | "3 Sao Tome and Principe Fruits Online C 2014-06-20 \n", 49 | "4 Rwanda Office Supplies Offline L NaN \n", 50 | "\n", 51 | " id units_sold unit_price \n", 52 | "0 669165933 9925.0 255.28 \n", 53 | "1 963881480 NaN 205.70 \n", 54 | "2 341417157 1779.0 651.21 \n", 55 | "3 514321792 8102.0 9.33 \n", 56 | "4 115456712 5062.0 651.21 " 57 | ], 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | "
countrytypechannelprioritydateidunits_soldunit_price
0TuvaluBaby FoodOfflineH2010-05-286691659339925.0255.28
1GrenadaCerealOnlineC2012-08-22963881480NaN205.70
2RussiaNaNOfflineL2014-05-023414171571779.0651.21
3Sao Tome and PrincipeFruitsOnlineC2014-06-205143217928102.09.33
4RwandaOffice SuppliesOfflineLNaN1154567125062.0651.21
\n", 145 | "
" 146 | ] 147 | }, 148 | "metadata": {}, 149 | "execution_count": 3 150 | } 151 | ], 152 | "metadata": {} 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 4, 157 | "source": [ 158 | "# filter by type\n", 159 | "df[df[\"type\"]==\"Personal Care\"]" 160 | ], 161 | "outputs": [ 162 | { 163 | "output_type": "execute_result", 164 | "data": { 165 | "text/plain": [ 166 | " country type channel priority date \\\n", 167 | "8 Republic of the Congo Personal Care Offline M 2015-07-14 \n", 168 | "14 Mongolia Personal Care Offline C 2014-02-19 \n", 169 | "24 Moldova Personal Care Online L 2016-05-07 \n", 170 | "31 South Sudan Personal Care Offline C 2013-12-29 \n", 171 | "35 Costa Rica Personal Care Offline L 2017-05-08 \n", 172 | "40 Niger Personal Care Online H 2017-03-11 \n", 173 | "47 Switzerland Personal Care Online M 2010-12-23 \n", 174 | "66 Gabon Personal Care Offline L 2012-07-08 \n", 175 | "85 Mexico Personal Care Offline L 2012-02-17 \n", 176 | "98 Mexico Personal Care Offline M 2015-07-30 \n", 177 | "101 Mexico Personal Care Offline M 2015-07-30 \n", 178 | "\n", 179 | " id units_sold unit_price \n", 180 | "8 770463311 6070.0 81.73 \n", 181 | "14 832401311 4901.0 81.73 \n", 182 | "24 740147912 5070.0 81.73 \n", 183 | "31 406502997 2125.0 81.73 \n", 184 | "35 456767165 6409.0 81.73 \n", 185 | "40 699285638 3015.0 81.73 \n", 186 | "47 617667090 273.0 81.73 \n", 187 | "66 228944623 8656.0 81.73 \n", 188 | "85 430915820 6422.0 81.73 \n", 189 | "98 559427106 5767.0 81.73 \n", 190 | "101 559427106 5767.0 81.73 " 191 | ], 192 | "text/html": [ 193 | "
\n", 194 | "\n", 207 | "\n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | "
countrytypechannelprioritydateidunits_soldunit_price
8Republic of the CongoPersonal CareOfflineM2015-07-147704633116070.081.73
14MongoliaPersonal CareOfflineC2014-02-198324013114901.081.73
24MoldovaPersonal CareOnlineL2016-05-077401479125070.081.73
31South SudanPersonal CareOfflineC2013-12-294065029972125.081.73
35Costa RicaPersonal CareOfflineL2017-05-084567671656409.081.73
40NigerPersonal CareOnlineH2017-03-116992856383015.081.73
47SwitzerlandPersonal CareOnlineM2010-12-23617667090273.081.73
66GabonPersonal CareOfflineL2012-07-082289446238656.081.73
85MexicoPersonal CareOfflineL2012-02-174309158206422.081.73
98MexicoPersonal CareOfflineM2015-07-305594271065767.081.73
101MexicoPersonal CareOfflineM2015-07-305594271065767.081.73
\n", 345 | "
" 346 | ] 347 | }, 348 | "metadata": {}, 349 | "execution_count": 4 350 | } 351 | ], 352 | "metadata": {} 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 5, 357 | "source": [ 358 | "# show missing value\n", 359 | "df[df.isna().any(axis=1)]" 360 | ], 361 | "outputs": [ 362 | { 363 | "output_type": "execute_result", 364 | "data": { 365 | "text/plain": [ 366 | " country type channel priority date id \\\n", 367 | "1 Grenada Cereal Online C 2012-08-22 963881480 \n", 368 | "2 Russia NaN Offline L 2014-05-02 341417157 \n", 369 | "4 Rwanda Office Supplies Offline L NaN 115456712 \n", 370 | "5 NaN Baby Food Online C 2015-02-04 547995746 \n", 371 | "\n", 372 | " units_sold unit_price \n", 373 | "1 NaN 205.70 \n", 374 | "2 1779.0 651.21 \n", 375 | "4 5062.0 651.21 \n", 376 | "5 2974.0 255.28 " 377 | ], 378 | "text/html": [ 379 | "
\n", 380 | "\n", 393 | "\n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "
countrytypechannelprioritydateidunits_soldunit_price
1GrenadaCerealOnlineC2012-08-22963881480NaN205.70
2RussiaNaNOfflineL2014-05-023414171571779.0651.21
4RwandaOffice SuppliesOfflineLNaN1154567125062.0651.21
5NaNBaby FoodOnlineC2015-02-045479957462974.0255.28
\n", 454 | "
" 455 | ] 456 | }, 457 | "metadata": {}, 458 | "execution_count": 5 459 | } 460 | ], 461 | "metadata": {} 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 6, 466 | "source": [ 467 | "# drop missing value\n", 468 | "df = df.dropna()\n", 469 | "\n", 470 | "# show missing value after drop \n", 471 | "df[df.isna().any(axis=1)]" 472 | ], 473 | "outputs": [ 474 | { 475 | "output_type": "execute_result", 476 | "data": { 477 | "text/plain": [ 478 | "Empty DataFrame\n", 479 | "Columns: [country, type, channel, priority, date, id, units_sold, unit_price]\n", 480 | "Index: []" 481 | ], 482 | "text/html": [ 483 | "
\n", 484 | "\n", 497 | "\n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | "
countrytypechannelprioritydateidunits_soldunit_price
\n", 514 | "
" 515 | ] 516 | }, 517 | "metadata": {}, 518 | "execution_count": 6 519 | } 520 | ], 521 | "metadata": {} 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 7, 526 | "source": [ 527 | "# check unique column type\n", 528 | "df[\"type\"].unique()" 529 | ], 530 | "outputs": [ 531 | { 532 | "output_type": "execute_result", 533 | "data": { 534 | "text/plain": [ 535 | "array(['Baby Food', 'Fruits', 'Household', 'Vegetables', 'Personal Care',\n", 536 | " 'Cereal', 'Clothes', 'Cosmetics', 'Beverages', 'Meat', 'Snacks',\n", 537 | " 'Frutis', 'Office Supplies'], dtype=object)" 538 | ] 539 | }, 540 | "metadata": {}, 541 | "execution_count": 7 542 | } 543 | ], 544 | "metadata": {} 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 8, 549 | "source": [ 550 | "# replace Frutis to Fruits (typo)\n", 551 | "df.loc[df[\"type\"]==\"Frutis\", \"type\"] = \"Fruits\"\n", 552 | "\n", 553 | "# check unique column type\n", 554 | "df[\"type\"].unique()" 555 | ], 556 | "outputs": [ 557 | { 558 | "output_type": "execute_result", 559 | "data": { 560 | "text/plain": [ 561 | "array(['Baby Food', 'Fruits', 'Household', 'Vegetables', 'Personal Care',\n", 562 | " 'Cereal', 'Clothes', 'Cosmetics', 'Beverages', 'Meat', 'Snacks',\n", 563 | " 'Office Supplies'], dtype=object)" 564 | ] 565 | }, 566 | "metadata": {}, 567 | "execution_count": 8 568 | } 569 | ], 570 | "metadata": {} 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 9, 575 | "source": [ 576 | "# duplicate data\n", 577 | "df[df.duplicated()]" 578 | ], 579 | "outputs": [ 580 | { 581 | "output_type": "execute_result", 582 | "data": { 583 | "text/plain": [ 584 | " country type channel priority date id \\\n", 585 | "101 Mexico Personal Care Offline M 2015-07-30 559427106 \n", 586 | "\n", 587 | " units_sold unit_price \n", 588 | "101 5767.0 81.73 " 589 | ], 590 | "text/html": [ 591 | "
\n", 592 | "\n", 605 | "\n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | "
countrytypechannelprioritydateidunits_soldunit_price
101MexicoPersonal CareOfflineM2015-07-305594271065767.081.73
\n", 633 | "
" 634 | ] 635 | }, 636 | "metadata": {}, 637 | "execution_count": 9 638 | } 639 | ], 640 | "metadata": {} 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 10, 645 | "source": [ 646 | "# check record yang duplicate\n", 647 | "df[df[\"id\"]==559427106]" 648 | ], 649 | "outputs": [ 650 | { 651 | "output_type": "execute_result", 652 | "data": { 653 | "text/plain": [ 654 | " country type channel priority date id \\\n", 655 | "98 Mexico Personal Care Offline M 2015-07-30 559427106 \n", 656 | "101 Mexico Personal Care Offline M 2015-07-30 559427106 \n", 657 | "\n", 658 | " units_sold unit_price \n", 659 | "98 5767.0 81.73 \n", 660 | "101 5767.0 81.73 " 661 | ], 662 | "text/html": [ 663 | "
\n", 664 | "\n", 677 | "\n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | "
countrytypechannelprioritydateidunits_soldunit_price
98MexicoPersonal CareOfflineM2015-07-305594271065767.081.73
101MexicoPersonal CareOfflineM2015-07-305594271065767.081.73
\n", 716 | "
" 717 | ] 718 | }, 719 | "metadata": {}, 720 | "execution_count": 10 721 | } 722 | ], 723 | "metadata": {} 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 11, 728 | "source": [ 729 | "# drop duplicate\n", 730 | "df = df.drop_duplicates()\n", 731 | "\n", 732 | "# duplicate data again\n", 733 | "df[df.duplicated()]" 734 | ], 735 | "outputs": [ 736 | { 737 | "output_type": "execute_result", 738 | "data": { 739 | "text/plain": [ 740 | "Empty DataFrame\n", 741 | "Columns: [country, type, channel, priority, date, id, units_sold, unit_price]\n", 742 | "Index: []" 743 | ], 744 | "text/html": [ 745 | "
\n", 746 | "\n", 759 | "\n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | "
countrytypechannelprioritydateidunits_soldunit_price
\n", 776 | "
" 777 | ] 778 | }, 779 | "metadata": {}, 780 | "execution_count": 11 781 | } 782 | ], 783 | "metadata": {} 784 | }, 785 | { 786 | "cell_type": "code", 787 | "execution_count": 12, 788 | "source": [ 789 | "# export hasil cleaning\n", 790 | "df.to_csv(\"data-warehouse/sales_data_cleaned.csv\", index=False)" 791 | ], 792 | "outputs": [], 793 | "metadata": {} 794 | } 795 | ], 796 | "metadata": { 797 | "orig_nbformat": 4, 798 | "language_info": { 799 | "name": "python", 800 | "version": "3.8.10", 801 | "mimetype": "text/x-python", 802 | "codemirror_mode": { 803 | "name": "ipython", 804 | "version": 3 805 | }, 806 | "pygments_lexer": "ipython3", 807 | "nbconvert_exporter": "python", 808 | "file_extension": ".py" 809 | }, 810 | "kernelspec": { 811 | "name": "python3", 812 | "display_name": "Python 3.8.10 64-bit" 813 | }, 814 | "interpreter": { 815 | "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 816 | } 817 | }, 818 | "nbformat": 4, 819 | "nbformat_minor": 2 820 | } --------------------------------------------------------------------------------