├── Shell Scripting
├── readme.md
├── source
│ ├── data-sed.csv
│ ├── code-scribble.txt
│ ├── logfile.log
│ ├── welcome.sh
│ ├── customer.csv
│ ├── if-else.sh
│ ├── if-else-args.sh
│ ├── sales.csv
│ ├── data-source.csv
│ └── data-source.json
├── images
│ ├── 1-vim.png
│ ├── 2-for.png
│ ├── 3-awk-1.png
│ ├── 3-sed-1.png
│ ├── 2-if-else.png
│ ├── 2-variables.png
│ ├── 2-welcome.png
│ ├── 4-data-source.png
│ ├── 2-if-else-args.png
│ ├── 2-welcome-edit.png
│ ├── 1-environment-gitbash.png
│ └── 2-if-else-args-loop.png
├── assignment-solutions.png
├── intro.md
├── prerequisite.md
├── session-1.md
├── session-4.md
├── session-2.md
└── session-3.md
├── Python Programming
├── hello.py
├── Data Engineering Indonesia.png
├── Introduction to Python Programming.pdf
├── data-warehouse
│ └── sales_data_cleaned.csv
├── sources
│ └── sales_data.csv
├── python-basic.ipynb
└── data-pipeline-breakdown.ipynb
└── IntroSQL
├── IntroSQL.pdf
├── images
├── sql_editor.png
├── challenge_output.png
├── full_join_result.png
├── inner_join_result.png
├── left_join_result.png
├── right_join_result.png
├── update_returning.png
└── postgreSQL_connection_info.png
├── challenges.md
└── README.md
/Shell Scripting/readme.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Python Programming/hello.py:
--------------------------------------------------------------------------------
1 | teks = "python sangat mudah dipahami"
2 | print(teks)
--------------------------------------------------------------------------------
/IntroSQL/IntroSQL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/IntroSQL.pdf
--------------------------------------------------------------------------------
/Shell Scripting/source/data-sed.csv:
--------------------------------------------------------------------------------
1 | Nama, Usia, Kota
2 | Yusuf,17,New York
3 | Zalmawati,25,Perth
4 | Reza,28,Bekasi
--------------------------------------------------------------------------------
/IntroSQL/images/sql_editor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/sql_editor.png
--------------------------------------------------------------------------------
/Shell Scripting/images/1-vim.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/1-vim.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-for.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-for.png
--------------------------------------------------------------------------------
/Shell Scripting/images/3-awk-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/3-awk-1.png
--------------------------------------------------------------------------------
/Shell Scripting/images/3-sed-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/3-sed-1.png
--------------------------------------------------------------------------------
/IntroSQL/images/challenge_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/challenge_output.png
--------------------------------------------------------------------------------
/IntroSQL/images/full_join_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/full_join_result.png
--------------------------------------------------------------------------------
/IntroSQL/images/inner_join_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/inner_join_result.png
--------------------------------------------------------------------------------
/IntroSQL/images/left_join_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/left_join_result.png
--------------------------------------------------------------------------------
/IntroSQL/images/right_join_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/right_join_result.png
--------------------------------------------------------------------------------
/IntroSQL/images/update_returning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/update_returning.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-if-else.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-if-else.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-variables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-variables.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-welcome.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-welcome.png
--------------------------------------------------------------------------------
/Shell Scripting/assignment-solutions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/assignment-solutions.png
--------------------------------------------------------------------------------
/Shell Scripting/images/4-data-source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/4-data-source.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-if-else-args.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-if-else-args.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-welcome-edit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-welcome-edit.png
--------------------------------------------------------------------------------
/IntroSQL/images/postgreSQL_connection_info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/IntroSQL/images/postgreSQL_connection_info.png
--------------------------------------------------------------------------------
/Shell Scripting/images/1-environment-gitbash.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/1-environment-gitbash.png
--------------------------------------------------------------------------------
/Shell Scripting/images/2-if-else-args-loop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Shell Scripting/images/2-if-else-args-loop.png
--------------------------------------------------------------------------------
/Python Programming/Data Engineering Indonesia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Python Programming/Data Engineering Indonesia.png
--------------------------------------------------------------------------------
/Python Programming/Introduction to Python Programming.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-engineers-id/dateng-nongki/HEAD/Python Programming/Introduction to Python Programming.pdf
--------------------------------------------------------------------------------
/Shell Scripting/source/code-scribble.txt:
--------------------------------------------------------------------------------
1 | name="Waskito Pringgohandoko" \
2 | age=40 \
3 | height=180.5 \
4 | fave_fruits=("banana" "apple" "kiwi" "jackfruit") \
5 | this_is_true=1 \
6 | this_is_false=0 \
7 | null_var=
8 |
9 |
--------------------------------------------------------------------------------
/Shell Scripting/source/logfile.log:
--------------------------------------------------------------------------------
1 | 2022-01-01,INFO,User logged in
2 | 2022-01-01,ERROR,Invalid input detected
3 | 2022-01-02,WARNING,Disk space low
4 | 2022-01-02,ERROR,Database connection failed
5 | 2022-01-03,INFO,User logged out
--------------------------------------------------------------------------------
/Shell Scripting/source/welcome.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This is a sample shell script
4 | # It displays a welcome message and the current date
5 |
6 | echo "Welcome to the Shell Scripting Course!"
7 | echo "Today's date is: $(date)"
8 |
--------------------------------------------------------------------------------
/Shell Scripting/source/customer.csv:
--------------------------------------------------------------------------------
1 | customer_code,address,customer_name
2 | CUST001,123 Main Street,John Smith
3 | CUST002,456 Oak Avenue,Jane Doe
4 | CUST003,789 Elm Lane,Michael Johnson
5 | CUST004,321 Pine Road,Sarah Wilson
6 | CUST005,987 Maple Drive,Emily Brown
7 |
--------------------------------------------------------------------------------
/Shell Scripting/source/if-else.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script demonstrates conditional statements
4 |
5 | read -p "Enter a number: " num
6 |
7 | if [ $num -gt 10 ]; then
8 | echo "The number is greater than 10"
9 | else
10 | echo "The number is less than or equal to 10"
11 | fi
12 |
--------------------------------------------------------------------------------
/Shell Scripting/source/if-else-args.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script demonstrates conditional statements
4 |
5 | # read -p "Enter a number: " num
6 |
7 | if [ $1 -gt 10 ]; then
8 | echo "The number is greater than 10"
9 | else
10 | echo "The number is less than or equal to 10"
11 | fi
12 |
--------------------------------------------------------------------------------
/Shell Scripting/source/sales.csv:
--------------------------------------------------------------------------------
1 | date,product,quantity,price,customer_id
2 | 2022-01-01,Shirt,10,25.00,CUST001
3 | 2022-01-01,Pants,5,40.00,CUST002
4 | 2022-01-02,Shoes,2,80.00,CUST003
5 | 2022-01-03,Hat,3,15.00,CUST004
6 | 2022-01-03,Shirt,7,25.00,CUST005
7 | 2022-01-04,Shoes,1,80.00,CUST001
8 | 2022-01-04,Pants,4,40.00,CUST002
9 |
--------------------------------------------------------------------------------
/IntroSQL/challenges.md:
--------------------------------------------------------------------------------
1 | ## Challenges Question
2 | Display Top 10 product, order date and sum of total price with category product contains tea and breads with total price is more than 1000 sorting by highest sum of total price.
3 |
4 | Display output column product name, order date, sum of total price.
5 |
6 | Example output :
7 | 
8 |
--------------------------------------------------------------------------------
/Shell Scripting/source/data-source.csv:
--------------------------------------------------------------------------------
1 | Nama,Umur
2 | Arnetha_Marchelina,25
3 | Sebastian_Cahyo_Ardhi_Iswara,30
4 | Helmi_Aziz_Muhammad,22
5 | Muhammad_Ridwan_Maulana,28
6 | Arif_Setiyawan,24
7 | Rahmatulloh,31
8 | Aidiel_Fitra,23
9 | Yusuf_Hanafi_Angkat,26
10 | Isharridho_Pratama,27
11 | Vincent_Junitio_Ungu,29
12 | Hairulloh_Sukur,33
13 | Yuandika_Alfahreiza,20
14 | Sahala_Josua_Sinaga,34
15 | Septian_Dwi_Kurnia,21
16 | Fitra_Anugrah,32
17 | Muhammad_Ridwan_Maulana,35
--------------------------------------------------------------------------------
/Shell Scripting/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 |
3 |
4 | My name is `Waskito Pringgohandoko`, you can call me **Kikit**.
5 | - I was born in the 80’s.
6 | - I started to scratch the surface of data engineering about a year ago.
7 |
8 | I’m here to share, and I hope you guys will enjoy this event by sharing your thoughts, concerns, and questions, and we’ll do our best to address them.
9 |
10 |
11 | # What will we do today?
12 |
13 | ## Have fun!
14 | While doing it, we will have these hands-ons:
15 | - Creating your first shell script and run it
16 | - Make the script do things across multiple use cases
17 | - Enhance the script using variables and simple programming flows
18 | - Automate the script
19 |
20 |
21 | ## Resources
22 |
23 | The scripts are provided in the /source directory
24 |
25 |
--------------------------------------------------------------------------------
/Shell Scripting/prerequisite.md:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 |
3 | ## Device
4 | - Bring your own laptop
5 |
6 | ## Application
7 | - Built-in Command Line Interface (Linux / MacOS) or 'terminal'
8 | - GitBash (Windows) - download [here](https://gitforwindows.org/)
9 | - Your favorite text editor such as nano/vim/Notepad++/etc. For reference, the hands-on will be using `vim`.
10 |
11 | ## Test your environment
12 | 1. Open your terminal or open GitBash
13 | 2. Open your text editor using their command, e.g.
14 | - Vim: `$ vim`
15 | - Nano: `$ nano`
16 | 3. Open another terminal/GitBash session, make sure you can run these commands:
17 | - `$ grep`
18 | - `$ sed`
19 | - `$ awk`
20 |
21 | If something like this turns up:
22 |
23 | ```shell
24 | $ awk
25 | bash: awk: command not found
26 | ```
27 | it means your environment is missing that command. Troubleshoot and test again.
--------------------------------------------------------------------------------
/Shell Scripting/session-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | marp: true
3 | ---
4 |
5 | # Let’s start!
6 |
7 | ## Shell Script. What is it?
8 |
9 | In the Data Engineering context, a shell script is a powerful tool that enables us to interact with the command-line interface (CLI) of our operating system and execute a series of commands in a sequential manner. It allows us to `automate tasks` and `streamline our data workflows`.
10 |
11 | We will use a shell script called `bash`, which is widely used in the industry and offers a rich set of features and functionalities for scripting purposes.
12 |
13 | ---
14 |
15 | ## Understand your Shell Environment.
16 |
17 | - A Terminal: Linux or Mac users will be using your default terminal app, while Windows users can use GitBash for the terminal.
18 | 
19 | ---
20 |
21 | ## Let's try it using some basic commands
22 | * $ ls
23 | * $ pwd
24 | * $ cd
25 | * $ echo
26 |
27 | ---
28 | - A Text Editor: It’s really a weapon of choice, you can use your favorite text editor.
29 | We will use `vim` for this course, please adjust it to your preferred text editor.
30 | 
31 |
32 | ---
33 |
34 | - Shell Scripting libraries:
35 | - grep
36 | - awk
37 | - sed
38 | - etc.
39 |
40 | ---
--------------------------------------------------------------------------------
/Shell Scripting/source/data-source.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "Nama": "Arnetha_Marchelina",
4 | "Umur": 25
5 | },
6 | {
7 | "Nama": "Sebastian_Cahyo_Ardhi_Iswara",
8 | "Umur": 30,
9 | "alamat": {
10 | "kota": "New York",
11 | "kodepos": "10001"
12 | },
13 | "hobbies": ["reading", "painting", "hiking"]
14 | },
15 | {
16 | "Nama": "Helmi_Aziz_Muhammad",
17 | "Umur": 22
18 | },
19 | {
20 | "Nama": "Muhammad_Ridwan_Maulana",
21 | "Umur": 28,
22 | "alamat": {
23 | "kota": "Jakarta",
24 | "kodepos": "00892"
25 | },
26 | "hobbies": ["swimming", "dancing"]
27 | },
28 | {
29 | "Nama": "Arif_Setiyawan",
30 | "Umur": 24
31 | },
32 | {
33 | "Nama": "Rahmatulloh",
34 | "Umur": 31
35 | },
36 | {
37 | "Nama": "Aidiel_Fitra",
38 | "Umur": 23
39 | },
40 | {
41 | "Nama": "Yusuf_Hanafi_Angkat",
42 | "Umur": 26
43 | },
44 | {
45 | "Nama": "Isharridho_Pratama",
46 | "Umur": 27
47 | },
48 | {
49 | "Nama": "Vincent_Junitio_Ungu",
50 | "Umur": 29
51 | },
52 | {
53 | "Nama": "Hairulloh_Sukur",
54 | "Umur": 33
55 | },
56 | {
57 | "Nama": "Yuandika_Alfahreiza",
58 | "Umur": 20
59 | },
60 | {
61 | "Nama": "Sahala_Josua_Sinaga",
62 | "Umur": 34,
63 | "alamat": {
64 | "kota": "Bangalore",
65 | "kodepos": "4435AA"
66 | },
67 | "hobbies": ["cooking", "photography"]
68 | },
69 | {
70 | "Nama": "Septian_Dwi_Kurnia",
71 | "Umur": 21
72 | },
73 | {
74 | "Nama": "Fitra_Anugrah",
75 | "Umur": 32
76 | },
77 | {
78 | "Nama": "Muhammad_Ridwan_Maulana",
79 | "Umur": 35
80 | }
81 | ]
--------------------------------------------------------------------------------
/Shell Scripting/session-4.md:
--------------------------------------------------------------------------------
1 | ---
2 | marp: true
3 | ---
4 |
5 | # What about JSON files?
6 | ---
7 | # Have you heard of `jq` library?
8 | - It's a powerful command-line utility and library for processing JSON data in Unix-like environments.
9 | - designed to work with JSON data streams and provides a wide range of functionalities for data filtering, formatting, and cleansing.
10 | - `jq` allows you to extract specific data from JSON, manipulate JSON structures, and perform various transformations on the data.
11 |
12 | ---
13 |
14 | # Let's learn together
15 |
16 | ---
17 |
18 | - We will use the same data as before, but in JSON format
19 | ![data-source][data-source]
20 |
21 | ---
22 | # Navigating a JSON file
23 | ## as simple as `jq 'QUERY' data-source.json`
24 | - To view the entire JSON data:
25 | `jq '.' data-source.json`
26 | - To select and display only the "Nama" and "Umur" fields of all records:
27 | `jq '.[] | {Nama, Umur}' data-source.json`
28 | - To filter records with an age greater than 25:
29 | `jq '.[] | select(.Umur > 25)' data-source.json`
30 | - To extract "Nama" and "Umur" fields of records with hobbies:
31 | `jq '.[] | select(.hobbies) | {Nama, Umur}' data-source.json`
32 | ---
33 | - To display the "kota" (city) field from records that have the "alamat" (address) field:
34 | `jq '.[] | select(.alamat) | .alamat.kota' data-source.json`
35 | - To count the number of records:
36 | `jq 'length' data-source.json`
37 | - To find the maximum and minimum age:
38 | `jq 'max_by(.Umur) | .Umur' data-source.json`
39 | `jq 'min_by(.Umur) | .Umur' data-source.json`
40 | - To extract "Nama" and "Umur" fields and format as "Nama berusia Umur"
41 | `jq '.[] | "\(.Nama) berusia \(.Umur)"' data-source.json`
42 | ---
43 |
44 | ## The `map` directive
45 | - used to apply a filter or function to each element of an array. It allows you to perform data transformations or filtering on arrays of JSON objects.
46 | - Let's try to filter records with `Umur` greater than 30
47 | `jq 'map(select(.Umur > 30))' data-source.json`
48 | - Now, we replace the underscores in `Nama` with spaces
49 | `jq 'map(.Nama |= gsub("_"; " "))' data-source.json`
50 |
51 | ---
52 |
53 | ## the `sort` directive
54 | - The `sort` function along with the `reverse` function in `jq` are used to sort the JSON data in descending order based on a specific field. The `sort` function sorts the JSON data in ascending order, and then the `reverse` function is used to reverse the order and obtain the descending sort.
55 | `jq 'sort_by(.Umur) | reverse' data-source.json`
56 |
57 | ---
58 |
59 | ## [assignment] Data Processing
60 | ### Tasks
61 | - Using the `data-source.json` file
62 | - Perform data cleansing to replace underscores in the `Nama` column with spaces.
63 | - Perform data extraction to format the output as `'Nama' berusia 'Umur'`.
64 | - Filter and group ages below-25 and 25-and-above.
65 | - Combine those commands into a single shell script file.
66 |
67 | [data-source]: images/4-data-source.png
--------------------------------------------------------------------------------
/Shell Scripting/session-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | marp: true
3 | ---
4 |
5 | # Let's learn
6 | ---
7 |
8 |
9 | # Our first scripts of the day
10 |
11 | ![welcome][welcome]
12 |
13 | ---
14 |
15 | ## welcome.sh
16 | - Explanation
17 | - shebang `!#/bin/bash`
18 | it is used to specify which shell interpreter to be used.
19 | - comment `# this is a comment`
20 | anything after a '#' will not be run as a command.
21 | - echo
22 | we call this command to display output on the terminal, just like the one we used before.
23 | - date
24 | this command is used to retrieve current system's date and time
25 | ---
26 | ## Execute our script!
27 | - make it `executable` using this command: `$ chmod +x welcome.sh`
28 | - run it using this command: `$ ./welcome.sh`
29 | ---
30 | ## [assignment] welcome-edit.sh
31 |
32 | - copy the welcome.sh into a new file called `welcome-edit.sh`
33 | - modify the new file so the output is similar to this (5 minutes)
34 |
35 | ![welcome-edit][welcome-edit]
36 |
37 | ---
38 |
39 | ## Variables and data types
40 | - In shell scripting, variables are used to store data or values that can be accessed and manipulated throughout the script. Variables can hold various types of data, and the data type is determined implicitly based on the context in which the variable is used. Unlike some programming languages, shell scripting is not strongly typed, meaning you don't need to explicitly define the data type when declaring a variable.
41 |
42 | ---
43 |
44 | - Examples:
45 | - String: A sequence of characters enclosed in single or double quotes. Strings are the most commonly used data type in shell scripting.
46 | - Integer: Whole numbers without any decimal points.
47 | - Floating-Point: Numbers with decimal points.
48 | - Array: A collection of values accessible by their indices. Shell scripting supports one-dimensional arrays.
49 | - Boolean: Shell scripting doesn't have a native Boolean data type. Instead, booleans are represented using integer values, where 0 represents false, and any non-zero value represents true.
50 | - Null: Represents the absence of a value or an undefined variable.
51 | ---
52 |
53 | - It's important to note that shell scripting is loosely typed, meaning variables can change their data type during execution. For example, a variable that initially stores an integer value can later hold a string value without any explicit type conversion.
54 |
55 | - In shell scripts, you can access the value of a variable using the dollar sign `$` followed by the variable name. For example, `$name` will give you the value of the name variable, and `${fruits[0]}` will give you the first element of the `fruits` array.
56 | ---
57 | - Check this out:
58 | ![variables][variables]
59 |
60 | - Try to output each one with `$ echo $[variable name]`!!
61 | ---
62 | ## Basic control structures
63 | ### if-else
64 | - In this script, we prompt the user to enter a number using the read command. Then, we use an if-else construct to check if the number is greater than 10. If the condition is true, we display a message indicating that the number is greater than 10. Otherwise, we display a message indicating that the number is less than or equal to 10.
65 | ![if-else][if-else]
66 |
67 | ---
68 |
69 | ### if-else-args
70 | - copy the if-else.sh into a new file calles `if-else-args.sh`
71 | - comment the 5th line
72 | - change the 7th line into this: `if [ $1 -gt 10 ]; then`
73 |
74 | ![if-else-args][if-else-args]
75 |
76 |
77 | ---
78 | ### for
79 | - In this script, we use a for loop to iterate over a sequence of numbers from 1 to 5. Within each iteration, we display a message indicating the current iteration number.
80 | - Loops allow us to automate repetitive tasks, such as processing multiple files, iterating over database records, or performing calculations on a range of values. They enhance the efficiency and productivity of our data engineering workflows.
81 |
82 | ![for][for]
83 |
84 | ---
85 |
86 | ### [assignment] if-else-args-loop
87 | - copy the if-else-args.sh into a new file called `if-else-args-loop.sh`
88 | - modify the new file so the output is similar to this (10 minutes)
89 | - hint: use `"$@"` to loop over arguments
90 |
91 | ![if-else-args-loop][if-else-args-loop]
92 |
93 | [welcome]: images/2-welcome.png
94 | [welcome-edit]: images/2-welcome-edit.png
95 | [if-else-args-loop]: images/2-if-else-args-loop.png
96 | [variables]: images/2-variables.png
97 | [if-else]: images/2-if-else.png
98 | [if-else-args]: images/2-if-else-args.png
99 | [for]: images/2-for.png
100 |
101 |
--------------------------------------------------------------------------------
/Shell Scripting/session-3.md:
--------------------------------------------------------------------------------
1 | ---
2 | marp: true
3 | ---
4 |
5 | # Moving on!
6 | - Let's try to do some data processing!
7 | - make sure `sed`, `grep`, and `awk` is running in your environment
8 | - these text-based tools are so quick, it's so efficient even in a large number of texts
9 | ---
10 |
11 | # Let's learn
12 |
13 | ---
14 |
15 | ## The Stream Editor `sed`
16 | - Allows you to perform text transformations on an input stream (a file or input from a pipeline) and then output the modified stream. It is commonly used for search and replace operations, among other text manipulations.
17 |
18 | ---
19 |
20 | - Let's use this data and save it to a file called `data-sed.csv`
21 | ```
22 | Nama, Usia, Kota
23 | Yusuf,17,New York
24 | Zalmawati,25,Perth
25 | Reza,28,Bekasi
26 | ```
27 |
28 | - Our goal is to replace "Perth" with "Jakarta" in the Kota column. We can achieve this using the `sed` command as follows:
29 | `$ sed 's/Perth/Jakarta/' data-sed.csv > modified_data.csv`
30 |
31 | ---
32 |
33 | - Explanation:
34 | - `sed`: Invokes the sed command.
35 | - `s/Perth/Jakarta/`: This is the search and replace pattern. The `s` stands for substitute, and we're searching for "Perth" and replacing it with "Jakarta".
36 | - `sample_data.csv`: Specifies the input file, i.e., the file to perform the search and replace operation on.
37 | - `modified_data.csv`: Redirects the modified output to a new file named `modified_data.csv`.
38 |
39 | ---
40 |
41 | - Pro Tip:
42 | - You can also stream any output to `sed` to immediately change the value
43 | - You can use `regex` to perform even more powerful `search and replace` operations
44 |
45 | ---
46 |
47 | ## The Data Extractor `awk`
48 | - Operates on a per-line basis, reading input line by line and applying patterns and actions to process the data. `awk` excels at working with structured data, making it a great choice for parsing CSV files and performing various data manipulations.
49 |
50 | ---
51 | - Let's use the previous data file and copy it to a new file called `data-awk.csv`
52 | - We will try to extract the data using the format that we want:
`$ awk -F ',' 'NR > 1 {print $1 " berusia " $2 " tahun."}' data-awk.csv`
53 |
54 | ![awk-1][awk-1]
55 |
56 | ---
57 | - Explanation:
58 | - `-F ','`: This option specifies the field separator for awk. In this case, we set it to ',' to indicate that the CSV file uses commas as separators between fields.
59 | - `'...'`: The single quotes enclose the awk program.
60 | - `print $1 " berusia " $2 " tahun."`: This is the action part of the `awk` program. It prints the desired output, which consists of the first field ($1, representing the `Nama` column) and the second field ($2, representing the `Usia` column)
61 | - In this example, `awk` processes each line of the CSV file and prints the extracted information in the desired format.
62 |
63 | ---
64 | - Pro tip
65 | - `awk` can also perform conditional statements, loops. and calculations on data
66 | - You can use `regex` to perform even more powerful `data transformation` and `data cleaning ` operations
67 | ---
68 | ## The log file's best friend `grep`
69 | - Perfect for searching and filtering text based on patterns, including regular expressions.
70 | - It is commonly used to extract specific lines or patterns from files that match a given search criteria.
71 |
72 | ---
73 | - Let's use this data and save it to a file called `logfile.log`
74 | ```
75 | 2022-01-01,INFO,User logged in
76 | 2022-01-01,ERROR,Invalid input detected
77 | 2022-01-02,WARNING,Disk space low
78 | 2022-01-02,ERROR,Database connection failed
79 | 2022-01-03,INFO,User logged out
80 | ```
81 |
82 | - find all lines containing `ERROR` in the log file using this command:
83 | `$ grep "ERROR" logfile.log`
84 |
85 | - Explanation:
86 | - `grep "ERROR"`: This command searches for lines containing the word `ERROR` in the file `logfile.log` and prints those lines.
87 |
88 | ---
89 | - Now, let's use a regular expression to find lines that start with `2022-01-01`:
90 | `$ grep "^2022-01-01" logfile.log`
91 | - Explanation:
92 | - `grep "^2022-01-01"`: This command searches for lines that start with `2022-01-01` in the file `logfile.log` and prints those lines.
93 |
94 | ---
95 | ## [assignment] Data Processing
96 | ### Use this csv file, save it as `data-source.csv`
97 | ```
98 | Nama,Umur
99 | Arnetha_Marchelina,25
100 | Sebastian_Cahyo_Ardhi_Iswara,30
101 | Helmi_Aziz_Muhammad,22
102 | Muhammad_Ridwan_Maulana,28
103 | Arif_Setiyawan,24
104 | Rahmatulloh,31
105 | Aidiel_Fitra,23
106 | Yusuf_Hanafi_Angkat,26
107 | Isharridho_Pratama,27
108 | Vincent_Junitio_Ungu,29
109 | Hairulloh_Sukur,33
110 | Yuandika_Alfahreiza,20
111 | Sahala_Josua_Sinaga,34
112 | Septian_Dwi_Kurnia,21
113 | Fitra_Anugrah,32
114 | Muhammad_Ridwan_Maulana,35
115 | ```
116 | ---
117 | ### Tasks
118 | - Perform data cleansing using `sed` to replace underscores in the `Nama` column with spaces.
119 | - Perform data extraction using `awk` to format the output as `'Nama' berusia 'Umur'`.
120 | - Use `grep` to filter and group ages below-25 and 25-and-above.
121 | - Combine those commands into a single shell script file.
122 | - Clean your code, give comments, and output echoes as much as you can to help you test and troubleshoot.
123 | - Good luck!
124 |
125 | [sed-1]: images/3-sed-1.png
126 | [awk-1]: images/3-awk-1.png
--------------------------------------------------------------------------------
/Python Programming/data-warehouse/sales_data_cleaned.csv:
--------------------------------------------------------------------------------
1 | country,type,channel,priority,date,id,units_sold,unit_price
2 | Tuvalu,Baby Food,Offline,H,2010-05-28,669165933,9925.0,255.28
3 | Sao Tome and Principe,Fruits,Online,C,2014-06-20,514321792,8102.0,9.33
4 | Angola,Household,Offline,M,2011-04-23,135425221,4187.0,668.27
5 | Burkina Faso,Vegetables,Online,H,2012-07-17,871543967,8082.0,154.06
6 | Republic of the Congo,Personal Care,Offline,M,2015-07-14,770463311,6070.0,81.73
7 | Senegal,Cereal,Online,H,2014-04-18,616607081,6593.0,205.7
8 | Kyrgyzstan,Vegetables,Online,H,2011-06-24,814711606,124.0,154.06
9 | Cape Verde,Clothes,Offline,H,2014-08-02,939825713,4168.0,109.28
10 | Bangladesh,Clothes,Online,L,2017-01-13,187310731,8263.0,109.28
11 | Honduras,Household,Offline,H,2017-02-08,522840487,8974.0,668.27
12 | Mongolia,Personal Care,Offline,C,2014-02-19,832401311,4901.0,81.73
13 | Bulgaria,Clothes,Online,M,2012-04-23,972292029,1673.0,109.28
14 | Sri Lanka,Cosmetics,Offline,M,2016-11-19,419123971,6952.0,437.2
15 | Cameroon,Beverages,Offline,C,2015-04-01,519820964,5430.0,47.45
16 | Turkmenistan,Household,Offline,L,2010-12-30,441619336,3830.0,668.27
17 | East Timor,Meat,Online,L,2012-07-31,322067916,5908.0,421.89
18 | Norway,Baby Food,Online,L,2014-05-14,819028031,7450.0,255.28
19 | Portugal,Baby Food,Online,H,2015-07-31,860673511,1273.0,255.28
20 | Honduras,Snacks,Online,L,2016-06-30,795490682,2225.0,152.58
21 | New Zealand,Fruits,Online,H,2014-09-08,142278373,2187.0,9.33
22 | Moldova ,Personal Care,Online,L,2016-05-07,740147912,5070.0,81.73
23 | France,Cosmetics,Online,H,2017-05-22,898523128,1815.0,437.2
24 | Kiribati,Fruits,Online,M,2014-10-13,347140347,5398.0,9.33
25 | Mali,Fruits,Online,L,2010-05-07,686048400,5822.0,9.33
26 | Norway,Beverages,Offline,C,2014-07-18,435608613,5124.0,47.45
27 | The Gambia,Household,Offline,L,2012-05-26,886494815,2370.0,668.27
28 | Switzerland,Cosmetics,Offline,M,2012-09-17,249693334,8661.0,437.2
29 | South Sudan,Personal Care,Offline,C,2013-12-29,406502997,2125.0,81.73
30 | Australia,Office Supplies,Online,C,2015-10-27,158535134,2924.0,651.21
31 | Myanmar,Household,Offline,H,2015-01-16,177713572,8250.0,668.27
32 | Djibouti,Snacks,Online,M,2017-02-25,756274640,7327.0,152.58
33 | Costa Rica,Personal Care,Offline,L,2017-05-08,456767165,6409.0,81.73
34 | Syria,Fruits,Online,L,2011-11-22,162052476,3784.0,9.33
35 | The Gambia,Meat,Online,M,2017-01-14,825304400,4767.0,421.89
36 | Brunei,Office Supplies,Online,L,2012-04-01,320009267,6708.0,651.21
37 | Bulgaria,Office Supplies,Online,M,2012-02-16,189965903,3987.0,651.21
38 | Niger,Personal Care,Online,H,2017-03-11,699285638,3015.0,81.73
39 | Azerbaijan,Cosmetics,Online,M,2010-02-06,382392299,7234.0,437.2
40 | The Gambia,Cereal,Offline,H,2012-06-07,994022214,2117.0,205.7
41 | Slovakia,Vegetables,Online,H,2012-10-06,759224212,171.0,154.06
42 | Myanmar,Clothes,Online,H,2015-11-14,223359620,5930.0,109.28
43 | Comoros,Cereal,Offline,H,2016-03-29,902102267,962.0,205.7
44 | Iceland,Cosmetics,Online,C,2016-12-31,331438481,8867.0,437.2
45 | Switzerland,Personal Care,Online,M,2010-12-23,617667090,273.0,81.73
46 | Macedonia,Clothes,Offline,C,2014-10-14,787399423,7842.0,109.28
47 | Mauritania,Office Supplies,Offline,C,2012-01-11,837559306,1266.0,651.21
48 | Albania,Clothes,Online,C,2010-02-02,385383069,2269.0,109.28
49 | Lesotho,Fruits,Online,L,2013-08-18,918419539,9606.0,9.33
50 | Saudi Arabia,Cereal,Online,M,2013-03-25,844530045,4063.0,205.7
51 | Sierra Leone,Office Supplies,Offline,M,2011-11-26,441888415,3457.0,651.21
52 | Sao Tome and Principe,Fruits,Offline,H,2013-09-17,508980977,7637.0,9.33
53 | Cote d'Ivoire,Clothes,Online,C,2012-06-08,114606559,3482.0,109.28
54 | Fiji,Clothes,Offline,C,2010-06-30,647876489,9905.0,109.28
55 | Austria,Cosmetics,Offline,H,2015-02-23,868214595,2847.0,437.2
56 | United Kingdom,Household,Online,L,2012-01-05,955357205,282.0,668.27
57 | Djibouti,Cosmetics,Offline,H,2014-04-07,259353148,7215.0,437.2
58 | Australia,Cereal,Offline,H,2013-06-09,450563752,682.0,205.7
59 | San Marino,Baby Food,Online,L,2013-06-26,569662845,4750.0,255.28
60 | Cameroon,Office Supplies,Online,M,2011-11-07,177636754,5518.0,651.21
61 | Libya,Clothes,Offline,H,2010-10-30,705784308,6116.0,109.28
62 | Haiti,Cosmetics,Offline,H,2013-10-13,505716836,1705.0,437.2
63 | Rwanda,Cosmetics,Offline,H,2013-10-11,699358165,4477.0,437.2
64 | Gabon,Personal Care,Offline,L,2012-07-08,228944623,8656.0,81.73
65 | Belize,Clothes,Offline,M,2016-07-25,807025039,5498.0,109.28
66 | Lithuania,Office Supplies,Offline,H,2010-10-24,166460740,8287.0,651.21
67 | Madagascar,Clothes,Offline,L,2015-04-25,610425555,7342.0,109.28
68 | Turkmenistan,Office Supplies,Online,M,2013-04-23,462405812,5010.0,651.21
69 | Libya,Fruits,Online,L,2015-08-14,816200339,673.0,9.33
70 | Democratic Republic of the Congo,Beverages,Online,C,2011-05-26,585920464,5741.0,47.45
71 | Djibouti,Cereal,Online,H,2017-05-20,555990016,8656.0,205.7
72 | Pakistan,Cosmetics,Offline,L,2013-07-05,231145322,9892.0,437.2
73 | Mexico,Household,Offline,C,2014-11-06,986435210,6954.0,668.27
74 | Federated States of Micronesia,Beverages,Online,C,2014-10-28,217221009,9379.0,47.45
75 | Laos,Vegetables,Offline,C,2011-09-15,789176547,3732.0,154.06
76 | Monaco,Baby Food,Offline,H,2012-05-29,688288152,8614.0,255.28
77 | Samoa ,Cosmetics,Online,H,2013-07-20,670854651,9654.0,437.2
78 | Spain,Household,Offline,L,2012-10-21,213487374,4513.0,668.27
79 | Lebanon,Clothes,Online,L,2012-09-18,663110148,7884.0,109.28
80 | Iran,Cosmetics,Online,H,2016-11-15,286959302,6489.0,437.2
81 | Zambia,Snacks,Online,L,2011-01-04,122583663,4085.0,152.58
82 | Kenya,Vegetables,Online,L,2012-03-18,827844560,6457.0,154.06
83 | Mexico,Personal Care,Offline,L,2012-02-17,430915820,6422.0,81.73
84 | Sao Tome and Principe,Beverages,Offline,C,2011-01-16,180283772,8829.0,47.45
85 | The Gambia,Baby Food,Offline,M,2014-02-03,494747245,5559.0,255.28
86 | Kuwait,Fruits,Online,M,2012-04-30,513417565,522.0,9.33
87 | Slovenia,Beverages,Offline,C,2016-10-23,345718562,4660.0,47.45
88 | Sierra Leone,Office Supplies,Offline,H,2016-12-06,621386563,948.0,651.21
89 | Australia,Beverages,Offline,H,2014-07-07,240470397,9389.0,47.45
90 | Azerbaijan,Office Supplies,Online,M,2012-06-13,423331391,2021.0,651.21
91 | Romania,Cosmetics,Online,H,2010-11-26,660643374,7910.0,437.2
92 | Nicaragua,Beverages,Offline,C,2011-02-08,963392674,8156.0,47.45
93 | Mali,Clothes,Online,M,2011-07-26,512878119,888.0,109.28
94 | Malaysia,Fruits,Offline,L,2011-11-11,810711038,6267.0,9.33
95 | Sierra Leone,Vegetables,Offline,C,2016-06-01,728815257,1485.0,154.06
96 | Mexico,Personal Care,Offline,M,2015-07-30,559427106,5767.0,81.73
97 | Mozambique,Household,Offline,L,2012-02-10,665095412,5367.0,668.27
98 | Indonesia,Fruits,Online,H,2012-08-22,669165412,97.0,9925.0
99 |
--------------------------------------------------------------------------------
/Python Programming/sources/sales_data.csv:
--------------------------------------------------------------------------------
1 | country,type,channel,priority,date,id,units_sold,unit_price
2 | Tuvalu,Baby Food,Offline,H,2010-05-28,669165933,9925,255.28
3 | Grenada,Cereal,Online,C,2012-08-22,963881480,,205.7
4 | Russia,,Offline,L,2014-05-02,341417157,1779,651.21
5 | Sao Tome and Principe,Fruits,Online,C,2014-06-20,514321792,8102,9.33
6 | Rwanda,Office Supplies,Offline,L,,115456712,5062,651.21
7 | ,Baby Food,Online,C,2015-02-04,547995746,2974,255.28
8 | Angola,Household,Offline,M,2011-04-23,135425221,4187,668.27
9 | Burkina Faso,Vegetables,Online,H,2012-07-17,871543967,8082,154.06
10 | Republic of the Congo,Personal Care,Offline,M,2015-07-14,770463311,6070,81.73
11 | Senegal,Cereal,Online,H,2014-04-18,616607081,6593,205.7
12 | Kyrgyzstan,Vegetables,Online,H,2011-06-24,814711606,124,154.06
13 | Cape Verde,Clothes,Offline,H,2014-08-02,939825713,4168,109.28
14 | Bangladesh,Clothes,Online,L,2017-01-13,187310731,8263,109.28
15 | Honduras,Household,Offline,H,2017-02-08,522840487,8974,668.27
16 | Mongolia,Personal Care,Offline,C,2014-02-19,832401311,4901,81.73
17 | Bulgaria,Clothes,Online,M,2012-04-23,972292029,1673,109.28
18 | Sri Lanka,Cosmetics,Offline,M,2016-11-19,419123971,6952,437.2
19 | Cameroon,Beverages,Offline,C,2015-04-01,519820964,5430,47.45
20 | Turkmenistan,Household,Offline,L,2010-12-30,441619336,3830,668.27
21 | East Timor,Meat,Online,L,2012-07-31,322067916,5908,421.89
22 | Norway,Baby Food,Online,L,2014-05-14,819028031,7450,255.28
23 | Portugal,Baby Food,Online,H,2015-07-31,860673511,1273,255.28
24 | Honduras,Snacks,Online,L,2016-06-30,795490682,2225,152.58
25 | New Zealand,Fruits,Online,H,2014-09-08,142278373,2187,9.33
26 | Moldova ,Personal Care,Online,L,2016-05-07,740147912,5070,81.73
27 | France,Cosmetics,Online,H,2017-05-22,898523128,1815,437.2
28 | Kiribati,Fruits,Online,M,2014-10-13,347140347,5398,9.33
29 | Mali,Frutis,Online,L,2010-05-07,686048400,5822,9.33
30 | Norway,Beverages,Offline,C,2014-07-18,435608613,5124,47.45
31 | The Gambia,Household,Offline,L,2012-05-26,886494815,2370,668.27
32 | Switzerland,Cosmetics,Offline,M,2012-09-17,249693334,8661,437.2
33 | South Sudan,Personal Care,Offline,C,2013-12-29,406502997,2125,81.73
34 | Australia,Office Supplies,Online,C,2015-10-27,158535134,2924,651.21
35 | Myanmar,Household,Offline,H,2015-01-16,177713572,8250,668.27
36 | Djibouti,Snacks,Online,M,2017-02-25,756274640,7327,152.58
37 | Costa Rica,Personal Care,Offline,L,2017-05-08,456767165,6409,81.73
38 | Syria,Fruits,Online,L,2011-11-22,162052476,3784,9.33
39 | The Gambia,Meat,Online,M,2017-01-14,825304400,4767,421.89
40 | Brunei,Office Supplies,Online,L,2012-04-01,320009267,6708,651.21
41 | Bulgaria,Office Supplies,Online,M,2012-02-16,189965903,3987,651.21
42 | Niger,Personal Care,Online,H,2017-03-11,699285638,3015,81.73
43 | Azerbaijan,Cosmetics,Online,M,2010-02-06,382392299,7234,437.2
44 | The Gambia,Cereal,Offline,H,2012-06-07,994022214,2117,205.7
45 | Slovakia,Vegetables,Online,H,2012-10-06,759224212,171,154.06
46 | Myanmar,Clothes,Online,H,2015-11-14,223359620,5930,109.28
47 | Comoros,Cereal,Offline,H,2016-03-29,902102267,962,205.7
48 | Iceland,Cosmetics,Online,C,2016-12-31,331438481,8867,437.2
49 | Switzerland,Personal Care,Online,M,2010-12-23,617667090,273,81.73
50 | Macedonia,Clothes,Offline,C,2014-10-14,787399423,7842,109.28
51 | Mauritania,Office Supplies,Offline,C,2012-01-11,837559306,1266,651.21
52 | Albania,Clothes,Online,C,2010-02-02,385383069,2269,109.28
53 | Lesotho,Fruits,Online,L,2013-08-18,918419539,9606,9.33
54 | Saudi Arabia,Cereal,Online,M,2013-03-25,844530045,4063,205.7
55 | Sierra Leone,Office Supplies,Offline,M,2011-11-26,441888415,3457,651.21
56 | Sao Tome and Principe,Fruits,Offline,H,2013-09-17,508980977,7637,9.33
57 | Cote d'Ivoire,Clothes,Online,C,2012-06-08,114606559,3482,109.28
58 | Fiji,Clothes,Offline,C,2010-06-30,647876489,9905,109.28
59 | Austria,Cosmetics,Offline,H,2015-02-23,868214595,2847,437.2
60 | United Kingdom,Household,Online,L,2012-01-05,955357205,282,668.27
61 | Djibouti,Cosmetics,Offline,H,2014-04-07,259353148,7215,437.2
62 | Australia,Cereal,Offline,H,2013-06-09,450563752,682,205.7
63 | San Marino,Baby Food,Online,L,2013-06-26,569662845,4750,255.28
64 | Cameroon,Office Supplies,Online,M,2011-11-07,177636754,5518,651.21
65 | Libya,Clothes,Offline,H,2010-10-30,705784308,6116,109.28
66 | Haiti,Cosmetics,Offline,H,2013-10-13,505716836,1705,437.2
67 | Rwanda,Cosmetics,Offline,H,2013-10-11,699358165,4477,437.2
68 | Gabon,Personal Care,Offline,L,2012-07-08,228944623,8656,81.73
69 | Belize,Clothes,Offline,M,2016-07-25,807025039,5498,109.28
70 | Lithuania,Office Supplies,Offline,H,2010-10-24,166460740,8287,651.21
71 | Madagascar,Clothes,Offline,L,2015-04-25,610425555,7342,109.28
72 | Turkmenistan,Office Supplies,Online,M,2013-04-23,462405812,5010,651.21
73 | Libya,Fruits,Online,L,2015-08-14,816200339,673,9.33
74 | Democratic Republic of the Congo,Beverages,Online,C,2011-05-26,585920464,5741,47.45
75 | Djibouti,Cereal,Online,H,2017-05-20,555990016,8656,205.7
76 | Pakistan,Cosmetics,Offline,L,2013-07-05,231145322,9892,437.2
77 | Mexico,Household,Offline,C,2014-11-06,986435210,6954,668.27
78 | Federated States of Micronesia,Beverages,Online,C,2014-10-28,217221009,9379,47.45
79 | Laos,Vegetables,Offline,C,2011-09-15,789176547,3732,154.06
80 | Monaco,Baby Food,Offline,H,2012-05-29,688288152,8614,255.28
81 | Samoa ,Cosmetics,Online,H,2013-07-20,670854651,9654,437.2
82 | Spain,Household,Offline,L,2012-10-21,213487374,4513,668.27
83 | Lebanon,Clothes,Online,L,2012-09-18,663110148,7884,109.28
84 | Iran,Cosmetics,Online,H,2016-11-15,286959302,6489,437.2
85 | Zambia,Snacks,Online,L,2011-01-04,122583663,4085,152.58
86 | Kenya,Vegetables,Online,L,2012-03-18,827844560,6457,154.06
87 | Mexico,Personal Care,Offline,L,2012-02-17,430915820,6422,81.73
88 | Sao Tome and Principe,Beverages,Offline,C,2011-01-16,180283772,8829,47.45
89 | The Gambia,Baby Food,Offline,M,2014-02-03,494747245,5559,255.28
90 | Kuwait,Fruits,Online,M,2012-04-30,513417565,522,9.33
91 | Slovenia,Beverages,Offline,C,2016-10-23,345718562,4660,47.45
92 | Sierra Leone,Office Supplies,Offline,H,2016-12-06,621386563,948,651.21
93 | Australia,Beverages,Offline,H,2014-07-07,240470397,9389,47.45
94 | Azerbaijan,Office Supplies,Online,M,2012-06-13,423331391,2021,651.21
95 | Romania,Cosmetics,Online,H,2010-11-26,660643374,7910,437.2
96 | Nicaragua,Beverages,Offline,C,2011-02-08,963392674,8156,47.45
97 | Mali,Clothes,Online,M,2011-07-26,512878119,888,109.28
98 | Malaysia,Fruits,Offline,L,2011-11-11,810711038,6267,9.33
99 | Sierra Leone,Vegetables,Offline,C,2016-06-01,728815257,1485,154.06
100 | Mexico,Personal Care,Offline,M,2015-07-30,559427106,5767,81.73
101 | Mozambique,Household,Offline,L,2012-02-10,665095412,5367,668.27
102 | Indonesia,Fruits,Online,H,2012-08-22,669165412,97,9925
103 | Mexico,Personal Care,Offline,M,2015-07-30,559427106,5767,81.73
104 |
--------------------------------------------------------------------------------
/IntroSQL/README.md:
--------------------------------------------------------------------------------
1 | # Hands ON materi Workshop Intro SQL
2 | This Repository contain source code for event Workshop Intro SQL using PostgreSQL
3 |
4 | ## Prerequisite
5 | - PostgreSQL version 14 or above with include pgadmin. [Download](https://www.enterprisedb.com/downloads/postgres-postgresql-downloads)
6 | - DBeaver. [Download](https://dbeaver.io/download/)
7 | - VSCode (optional). [Download](https://code.visualstudio.com/download)
8 |
9 |
10 |
11 | ## 1) Create New Database Connection
12 | - Open DBeaver
13 | - Click New Database Connection
14 | - Choose PostgreSQL then click Next
15 | - Input Host : localhost, databases postgres, username: postgres, password: your DB password then click Finish
16 | 
17 |
18 |
19 |
20 | ## 2) Create New Database
21 | - Right click databases postgres then click SQL Editor --> Open SQL script
22 | 
23 | - Type Query bellow then click execute sql query
24 | ```
25 | CREATE DATABASE demo_intro_sql;
26 | ```
27 |
28 |
29 | ## 3) Hands On DDL
30 | ### 3.1 CREATE Table
31 | - Type Query bellow in SQL Editor for Create new Table
32 | ```
33 | CREATE TABLE book(
34 | book_id SERIAL PRIMARY KEY,
35 | book_name VARCHAR(50),
36 | book_category VARCHAR(15),
37 | qty INT,
38 | unit_price REAL
39 | );
40 | ```
41 |
42 |
43 | ### 3.2 Alter Table
44 | #### 3.2.1 ALTER Table Add Column
45 | - Type Query bellow in SQL Editor
46 | ```
47 | ALTER TABLE book
48 | ADD COLUMN created_dt DATE,
49 | ADD COLUMN changed_dt DATE,
50 | ADD COLUMN to_be_deleted INT;
51 | ```
52 | #### 3.2.2 ALTER Table Drop Column
53 | - Type Query bellow in SQL Editor
54 | ~~~
55 | ALTER TABLE book
56 | DROP COLUMN created_dt,
57 | DROP COLUMN changed_dt;
58 | ~~~
59 | #### 3.2.3 ALTER Table Rename Column
60 | - Type Query bellow in SQL Editor
61 | ~~~
62 | ALTER TABLE book
63 | RENAME COLUMN qty TO unit_qty;
64 | ~~~
65 | #### 3.2.4 ALTER Table Modify Datatype
66 | - Type Query bellow in SQL Editor
67 | ```
68 | ALTER TABLE book
69 | ALTER COLUMN to_be_deleted TYPE VARCHAR;
70 | ```
71 |
72 |
73 | ### 3.3 TRUNCATE Table
74 | - Type Query bellow in SQL Editor
75 | ```
76 | TRUNCATE TABLE book;
77 | ```
78 |
79 |
80 | ## 4) Hands On DML
81 | ### 4.1 INSERT Data
82 | - Download Example Database northwind [here](https://github.com/pthom/northwind_psql/blob/master/northwind.sql).
83 | - Open northwind.sql in DBever then run all query
84 | - Type Query bellow in SQL Editor
85 | ```
86 | INSERT INTO categories VALUES (9, 'Fruits', 'Fruits like Banana, mango, Apple etc', '\x');
87 | ```
88 |
89 |
90 | ### 4.2 UPDATE Data
91 | - Retrieve all data in products table using query bellow.
92 | ```
93 | SELECT * FROM products;
94 | ```
95 | - We see there are several product that have stock is 0 so we need update value to not 0. In this workshop, update stock become 20. Type query UPDATE bellow :
96 | ```
97 | UPDATE products
98 | SET units_in_stock = 20
99 | WHERE units_in_stock = 0
100 | RETURNING *;
101 | ```
102 | 
103 |
104 |
105 | ### 4.3 DELETE Data
106 | - Type Query bellow in SQL Editor
107 | ```
108 | DELETE FROM categories
109 | WHERE category_name = 'Fruits'
110 | RETURNING *;
111 | ```
112 |
113 |
114 |
115 | ## 5) Hands On DQL
116 | ### 5.1 Basic Select
117 | ```
118 | SELECT * FROM products;
119 | ```
120 | ### 5.2 Select with Where clause
121 | ```
122 | SELECT * FROM products
123 | WHERE unit_price > 10
124 | ```
125 | ### 5.3 Select with sorting column
126 | ```
127 | SELECT * FROM products
128 | WHERE unit_price > 10
129 | ORDER BY units_in_stock DESC
130 | ```
131 | ### 5.4 Limit Retrieve Data
132 | - Retrieve Top 10 Data
133 | ```
134 | SELECT * FROM products
135 | WHERE unit_price > 10
136 | ORDER BY units_in_stocks DESC
137 | LIMIT 10
138 | ```
139 | - Retrieve Top 10 Data with skip 2 record from top
140 | ```
141 | SELECT * FROM products
142 | WHERE unit_price > 10
143 | ORDER BY units_in_stocks DESC
144 | LIMIT 10 OFFSET 2
145 | ```
146 | ### 5.5 Display Unique Data
147 | ```
148 | SELECT DISTINCT category_id
149 | FROM products
150 | ```
151 | ### 5.6 Aggregate Function
152 | - Display maximum value of unit_price in products table
153 | ```
154 | SELECT max(unit_price) as max_unit_price FROM products
155 | ```
156 | - Display minimum value of unit_price in products table
157 | ```
158 | SELECT min(unit_price) as min_unit_price FROM products
159 | ```
160 |
161 | - Display total data in products table
162 | ```
163 | SELECT count(*) as total_data FROM products
164 | ```
165 |
166 | - Display total of unit_price in products table
167 | ```
168 | SELECT sum(unit_price) as sum_unit_price FROM products
169 | ```
170 |
171 | - Display average of unit_price in products table
172 | ```
173 | SELECT avg(unit_price) as average_unit_price FROM products
174 | ```
175 |
176 | ### 5.7 Grouping Data
177 | - Display Top 10 product name and maximum unit price with grouping by product name, sorting with highest maximum unit price
178 | ```
179 | SELECT product_name, max(unit_price) as max_unit_price
180 | FROM products
181 | GROUP BY product_name
182 | ORDER BY max_unit_price DESC
183 | LIMIT 10
184 | ```
185 |
186 | - Display Top 10 product name and maximum unit price with grouping by product name, sorting with highest maximum unit price and also maximum unit price more than 50
187 | ```
188 | SELECT product_name, max(unit_price) as max_unit_price
189 | FROM products
190 | GROUP BY product_name
191 | HAVING max(unit_price) > 50
192 | ORDER BY max_unit_price DESC
193 | LIMIT 10
194 | ```
195 |
196 | ### 5.8 JOIN
197 | - Display territory and region using JOIN
198 | ```
199 | SELECT t.territory_description, r.region_description
200 | FROM territories t, region r
201 | WHERE t.region_id = r.region_id
202 | ```
203 | ```
204 | SELECT t.territory_description, r.region_description
205 | FROM territories t INNER JOIN region r
206 | ON t.region_id = r.region_id
207 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using INNER JOIN
208 | ```
209 | SELECT
210 | p.product_name,
211 | o.order_date,
212 | od.unit_price,
213 | od.quantity,
214 | (od.unit_price * od.quantity) total_price
215 | FROM orders o
216 | INNER JOIN order_details od
217 | ON o.order_id = od.order_id
218 | INNER JOIN products p
219 | ON od.product_id = p.product_id
220 | GROUP BY
221 | p.product_name,
222 | o.order_date,
223 | od.unit_price,
224 | od.quantity
225 | ORDER BY total_price desc
226 | LIMIT 10
227 | ```
228 | 
229 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using LEFT JOIN
230 | - Add 1 record in orders table only
231 | ```
232 | INSERT INTO orders
233 | VALUES (11078, 'SIMOB', 7, '1998-05-06', '1998-06-03', NULL, 2, 18.4400005, 'Simons bistro', 'Vinbæltet 34', 'Kobenhavn', NULL, '1734', 'Denmark');
234 | ```
235 | - Run query bellow
236 | ```
237 | SELECT
238 | p.product_name,
239 | o.order_date,
240 | od.unit_price,
241 | od.quantity,
242 | (od.unit_price * od.quantity) total_price
243 | FROM orders o
244 | LEFT JOIN order_details od
245 | ON o.order_id = od.order_id
246 | LEFT JOIN products p
247 | ON od.product_id = p.product_id
248 | GROUP BY
249 | p.product_name,
250 | o.order_date,
251 | od.unit_price,
252 | od.quantity
253 | ORDER BY total_price DESC
254 | LIMIT 10
255 | ```
256 | 
257 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using RIGHT JOIN
258 | - Add 1 record in products table only
259 | ```
260 | INSERT INTO products
261 | VALUES (78, 'Original Computer', 13, 2, '12 boxes', 13, 32, 0, 15, 0);
262 | ```
263 | - Run query bellow
264 | ```
265 | SELECT
266 | p.product_name,
267 | o.order_date,
268 | od.unit_price,
269 | od.quantity,
270 | (od.unit_price * od.quantity) total_price
271 | FROM orders o
272 | RIGHT JOIN order_details od
273 | ON o.order_id = od.order_id
274 | RIGHT JOIN products p
275 | ON od.product_id = p.product_id
276 | GROUP BY
277 | p.product_name,
278 | o.order_date,
279 | od.unit_price,
280 | od.quantity
281 | ORDER BY total_price DESC
282 | LIMIT 10
283 | ```
284 | 
285 | - Display top 10 product name, order date, price, quantity and total price with sort by highest total price using FULL JOIN
286 | ```
287 | SELECT
288 | p.product_name,
289 | o.order_date,
290 | od.unit_price,
291 | od.quantity,
292 | (od.unit_price * od.quantity) total_price
293 | FROM orders o
294 | FULL JOIN order_details od
295 | ON o.order_id = od.order_id
296 | FULL JOIN products p
297 | ON od.product_id = p.product_id
298 | GROUP BY
299 | p.product_name,
300 | o.order_date,
301 | od.unit_price,
302 | od.quantity
303 | ORDER BY total_price DESC
304 | LIMIT 10
305 | ```
306 | 
307 |
308 |
309 |
310 | ## 5) Challenge Question
311 | Please click this [link](./challenges.md)
312 |
313 |
--------------------------------------------------------------------------------
/Python Programming/python-basic.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "### Docs : https://docs.google.com/presentation/d/1LAtAlk2TzYmGGZjE7nSI0899SNKihO4Ry-o0ir19oGs/preview"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "source": [
13 | "## Variables"
14 | ],
15 | "metadata": {}
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "source": [
21 | "nama = \"Ardhi\"\n",
22 | "jenis_kelamin = \"Pria\"\n",
23 | "umur = 35"
24 | ],
25 | "outputs": [],
26 | "metadata": {}
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "source": [
31 | "## Data Types - String"
32 | ],
33 | "metadata": {}
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 2,
38 | "source": [
39 | "nama_barang = 'Laptop'\n",
40 | "tipe_barang = \"A51K\"\n",
41 | "alamat_pengiriman = \"Jl. Maju Mundur Ditempat No. 31, Jakarta\""
42 | ],
43 | "outputs": [],
44 | "metadata": {}
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "source": [
49 | "## Data Types - Integer"
50 | ],
51 | "metadata": {}
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "source": [
57 | "stock_barang = 20\n",
58 | "barang_terjual = 950\n",
59 | "jumlah_anak = 3"
60 | ],
61 | "outputs": [],
62 | "metadata": {}
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "source": [
67 | "## Data Types - Float"
68 | ],
69 | "metadata": {}
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 4,
74 | "source": [
75 | "berat_badan = 35.7\n",
76 | "tinggi_badan = 170.5\n",
77 | "diskon = 0.5"
78 | ],
79 | "outputs": [],
80 | "metadata": {}
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "source": [
85 | "## Data Types - Boolean"
86 | ],
87 | "metadata": {}
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 5,
92 | "source": [
93 | "user_active = True\n",
94 | "married_status = False\n",
95 | "ready_to_ship = True"
96 | ],
97 | "outputs": [],
98 | "metadata": {}
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "source": [
103 | "## Data Types - List"
104 | ],
105 | "metadata": {}
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 6,
110 | "source": [
111 | "warna = [\"merah\", \"biru\", \"kuning\"]\n",
112 | "weight_history = [25.9, 28.1, 30.0, 32.4]\n",
113 | "random = [1, \"aku\", 34.8, '$', False]\n",
114 | "\n",
115 | "# get spesific item\n",
116 | "print(warna[1])"
117 | ],
118 | "outputs": [
119 | {
120 | "output_type": "stream",
121 | "name": "stdout",
122 | "text": [
123 | "biru\n"
124 | ]
125 | }
126 | ],
127 | "metadata": {}
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "source": [
132 | "## Data Types - Dictionary"
133 | ],
134 | "metadata": {}
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 7,
139 | "source": [
140 | "identitas = {\"nama\": \"Ardhi\", \"umur\": 31, \"jenis_kelamin\": \"Pria\"}\n",
141 | "product_detail = {\"nama\": \"Asis ROG\", \"type\": \"A51K\", \"diskon\": 0.2, \"harga\": 250000.0}\n",
142 | "mawar = {\"tersedia\": True, \"warna\": [\"merah\", \"putih\"] }\n",
143 | "\n",
144 | "# get spesific item\n",
145 | "identitas[\"umur\"]"
146 | ],
147 | "outputs": [
148 | {
149 | "output_type": "execute_result",
150 | "data": {
151 | "text/plain": [
152 | "31"
153 | ]
154 | },
155 | "metadata": {},
156 | "execution_count": 7
157 | }
158 | ],
159 | "metadata": {}
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "source": [
164 | "## Data Types - Tuple"
165 | ],
166 | "metadata": {}
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 8,
171 | "source": [
172 | "jenis_lagu = (\"pop\", \"rock\", \"dangdut\")\n",
173 | "penyanyi = (\"Awin Fals\", 33, \"Album Kenangan\")\n",
174 | "random = (22.3, \"gunting\", True, 0)\n",
175 | "\n",
176 | "# print(random)\n",
177 | "random[2] = \"ucup\"\n",
178 | "print(random)\n",
179 | "\n",
180 | "# get spesific item\n",
181 | "# jenis_lagu[2]"
182 | ],
183 | "outputs": [
184 | {
185 | "output_type": "error",
186 | "ename": "TypeError",
187 | "evalue": "'tuple' object does not support item assignment",
188 | "traceback": [
189 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
190 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
191 | "Input \u001b[0;32mIn [8]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m random \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m22.3\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgunting\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# print(random)\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m random[\u001b[38;5;241m2\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mucup\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(random)\n",
192 | "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment"
193 | ]
194 | }
195 | ],
196 | "metadata": {}
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "source": [
201 | "## Data Types - None"
202 | ],
203 | "metadata": {}
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "source": [
209 | "resep_obat = None\n",
210 | "mata_kuliah = None\n",
211 | "status_karyawan = None"
212 | ],
213 | "outputs": [
214 | {
215 | "output_type": "error",
216 | "ename": "SyntaxError",
217 | "evalue": "invalid syntax (318281694.py, line 1)",
218 | "traceback": [
219 | "\u001b[0;36m Input \u001b[0;32mIn [45]\u001b[0;36m\u001b[0m\n\u001b[0;31m resep_obat =\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
220 | ]
221 | }
222 | ],
223 | "metadata": {}
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "source": [
228 | "## Conditionals - If statement"
229 | ],
230 | "metadata": {}
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "source": [
236 | "nilai = 1\n",
237 | "if nilai == 1:\n",
238 | " # kode dibawah ini akan dieksekusi jika nilainya adalah 1\n",
239 | " print(\"ini adalah angka satu\") \n",
240 | "else:\n",
241 | " # kode dibawah ini akan dieksekusi jika nilainya bukan 1 \n",
242 | " print(\"ini bukanlah angka 1\")"
243 | ],
244 | "outputs": [
245 | {
246 | "output_type": "error",
247 | "ename": "IndentationError",
248 | "evalue": "expected an indented block (3315241015.py, line 4)",
249 | "traceback": [
250 | "\u001b[0;36m Input \u001b[0;32mIn [30]\u001b[0;36m\u001b[0m\n\u001b[0;31m print(\"ini adalah angka satu\")\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n"
251 | ]
252 | }
253 | ],
254 | "metadata": {}
255 | },
256 | {
257 | "cell_type": "markdown",
258 | "source": [
259 | "## Indentation"
260 | ],
261 | "metadata": {}
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "source": [
267 | "nama = \"Ardhi\"\n",
268 | "if nama == \"Ardhi\":\n",
269 | " print(\"Halo\")\n",
270 | " print(\"Nama saya \" + nama)"
271 | ],
272 | "outputs": [],
273 | "metadata": {}
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "source": [
278 | "## Comment"
279 | ],
280 | "metadata": {}
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "source": [
286 | "# kode dibawah ini adalah untuk menghitung 10 x 2\n",
287 | "perkalian = 10 * 2\n",
288 | "print(perkalian)\n",
289 | "\n",
290 | "'''\n",
291 | "Komentar ini dapat digunakan untuk multi baris\n",
292 | "sehingga memudahkan untuk memberikan komentar \n",
293 | "yang panjang\n",
294 | "'''\n",
295 | "\n",
296 | "# print(\"Halo\")\n",
297 | "print(\"Nama saya Ardhi\")"
298 | ],
299 | "outputs": [
300 | {
301 | "output_type": "stream",
302 | "name": "stdout",
303 | "text": [
304 | "20\n",
305 | "Nama saya Ardhi\n"
306 | ]
307 | }
308 | ],
309 | "metadata": {}
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "source": [
314 | "## Built-in Funcions"
315 | ],
316 | "metadata": {}
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "source": [
322 | "# len : untuk menghitung jumlah panjangnya suatu value\n",
323 | "warna = [\"merah\", \"biru\", \"kuning\"]\n",
324 | "len(warna)\n",
325 | "\n",
326 | "# min : untuk mengambil nilai terkecil\n",
327 | "nilai_rapor = [22.0, 32.4, 37.3]\n",
328 | "min(nilai_rapor)\n",
329 | "\n",
330 | "# print : untuk menampilkan value \n",
331 | "print(\"Halo, saya Ardhi\")\n",
332 | "\n",
333 | "\n"
334 | ],
335 | "outputs": [
336 | {
337 | "output_type": "stream",
338 | "name": "stdout",
339 | "text": [
340 | "Halo, saya Ardhi\n"
341 | ]
342 | }
343 | ],
344 | "metadata": {}
345 | },
346 | {
347 | "cell_type": "markdown",
348 | "source": [
349 | "## Type Cast"
350 | ],
351 | "metadata": {}
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "source": [
357 | "# integer\n",
358 | "harga = 1000\n",
359 | "\n",
360 | "# ubah menjadi float\n",
361 | "harga_float = float(harga)\n",
362 | "print(harga_float)\n",
363 | "print(type(harga_float))\n",
364 | "\n",
365 | "# ubah menjadi string\n",
366 | "harga_str = str(harga)\n",
367 | "print(harga_str)\n",
368 | "print(type(harga_str))"
369 | ],
370 | "outputs": [
371 | {
372 | "output_type": "stream",
373 | "name": "stdout",
374 | "text": [
375 | "1000.0\n",
376 | "\n",
377 | "1000\n",
378 | "\n"
379 | ]
380 | }
381 | ],
382 | "metadata": {}
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "source": [
387 | "## Exception"
388 | ],
389 | "metadata": {}
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "source": [
395 | "nilai = \"20.4\"\n",
396 | "try:\n",
397 | " hitung = 10 + nilai\n",
398 | " print(hitung)\n",
399 | "except Exception as e:\n",
400 | " print(e)"
401 | ],
402 | "outputs": [
403 | {
404 | "output_type": "stream",
405 | "name": "stdout",
406 | "text": [
407 | "unsupported operand type(s) for +: 'int' and 'str'\n"
408 | ]
409 | }
410 | ],
411 | "metadata": {}
412 | },
413 | {
414 | "cell_type": "markdown",
415 | "source": [
416 | "## Function"
417 | ],
418 | "metadata": {}
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": null,
423 | "source": [
424 | "def tampilkan_teks(input_text):\n",
425 | " '''\n",
426 | " input : input text (str)\n",
427 | " output: print()\n",
428 | " '''\n",
429 | " print(input_text)\n",
430 | "\n",
431 | "\n",
432 | "input_text = \"Halo, nama saya Ardhi\"\n",
433 | "tampilkan_teks(input_text)\n",
434 | "\n",
435 | "say_hi = \"Halo, nama saya udin\"\n",
436 | "tampilkan_teks(say_hi)\n"
437 | ],
438 | "outputs": [
439 | {
440 | "output_type": "stream",
441 | "name": "stdout",
442 | "text": [
443 | "Halo, nama saya Ardhi\n",
444 | "Halo, nama saya udin\n"
445 | ]
446 | }
447 | ],
448 | "metadata": {}
449 | },
450 | {
451 | "cell_type": "markdown",
452 | "source": [
453 | "## Import Packages"
454 | ],
455 | "metadata": {}
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": null,
460 | "source": [
461 | "import wget\n",
462 | "\n",
463 | "url = 'https://sample-videos.com/csv/Sample-Spreadsheet-10-rows.csv'\n",
464 | "download = wget.download(url)"
465 | ],
466 | "outputs": [],
467 | "metadata": {}
468 | },
469 | {
470 | "cell_type": "markdown",
471 | "source": [
472 | "## Loop"
473 | ],
474 | "metadata": {}
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": null,
479 | "source": [
480 | "# While loop akan dilakukan selama kondisinya memenuhi (True)\n",
481 | "count = 1\n",
482 | "while (count < 4):\n",
483 | " print (\"The count is: \", count)\n",
484 | " count = count + 1\n",
485 | "\n",
486 | "print (\"selesai!\")"
487 | ],
488 | "outputs": [
489 | {
490 | "output_type": "stream",
491 | "name": "stdout",
492 | "text": [
493 | "The count is: 1\n",
494 | "The count is: 2\n",
495 | "The count is: 3\n",
496 | "selesai!\n"
497 | ]
498 | }
499 | ],
500 | "metadata": {}
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": null,
505 | "source": [
506 | "# for loop digunakan untuk mengulang sebuah item / urutan apapun\n",
507 | "angka = [1,2,3,4,5]\n",
508 | "for x in angka:\n",
509 | " print(x)"
510 | ],
511 | "outputs": [
512 | {
513 | "output_type": "stream",
514 | "name": "stdout",
515 | "text": [
516 | "1\n",
517 | "2\n",
518 | "3\n",
519 | "4\n",
520 | "5\n"
521 | ]
522 | }
523 | ],
524 | "metadata": {}
525 | },
526 | {
527 | "cell_type": "code",
528 | "execution_count": null,
529 | "source": [
530 | "nama_barang = \"sepatu\"\n",
531 | "\n",
532 | "NAMA_barang = \"mobil\"\n",
533 | "\n",
534 | "print(nama_barang)"
535 | ],
536 | "outputs": [
537 | {
538 | "output_type": "stream",
539 | "name": "stdout",
540 | "text": [
541 | "sepatu\n"
542 | ]
543 | }
544 | ],
545 | "metadata": {}
546 | },
547 | {
548 | "cell_type": "code",
549 | "execution_count": null,
550 | "source": [
551 | "nilai = 10\n",
552 | "nilai_tambahan = 10\n",
553 | "\n",
554 | "hasil = nilai + nilai_tambahan\n",
555 | "print(hasil)"
556 | ],
557 | "outputs": [
558 | {
559 | "output_type": "stream",
560 | "name": "stdout",
561 | "text": [
562 | "20\n"
563 | ]
564 | }
565 | ],
566 | "metadata": {}
567 | }
568 | ],
569 | "metadata": {
570 | "orig_nbformat": 4,
571 | "language_info": {
572 | "name": "python",
573 | "version": "3.8.10",
574 | "mimetype": "text/x-python",
575 | "codemirror_mode": {
576 | "name": "ipython",
577 | "version": 3
578 | },
579 | "pygments_lexer": "ipython3",
580 | "nbconvert_exporter": "python",
581 | "file_extension": ".py"
582 | },
583 | "kernelspec": {
584 | "name": "python3",
585 | "display_name": "Python 3.8.10 64-bit"
586 | },
587 | "interpreter": {
588 | "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
589 | }
590 | },
591 | "nbformat": 4,
592 | "nbformat_minor": 2
593 | }
--------------------------------------------------------------------------------
/Python Programming/data-pipeline-breakdown.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "## Example Data Engineer Task\n",
7 | "### - implementasi ETL dalam kode python\n",
8 | "### - Extract CSV File -> Transformasi (Data Cleaning) -> Load ke folder data-warehouse"
9 | ],
10 | "metadata": {}
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "source": [
16 | "# import library\n",
17 | "import pandas as pd"
18 | ],
19 | "outputs": [],
20 | "metadata": {}
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "source": [
26 | "# read data\n",
27 | "df = pd.read_csv(\"sources/sales_data.csv\")"
28 | ],
29 | "outputs": [],
30 | "metadata": {}
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 3,
35 | "source": [
36 | "# show top 5 data\n",
37 | "df.head()"
38 | ],
39 | "outputs": [
40 | {
41 | "output_type": "execute_result",
42 | "data": {
43 | "text/plain": [
44 | " country type channel priority date \\\n",
45 | "0 Tuvalu Baby Food Offline H 2010-05-28 \n",
46 | "1 Grenada Cereal Online C 2012-08-22 \n",
47 | "2 Russia NaN Offline L 2014-05-02 \n",
48 | "3 Sao Tome and Principe Fruits Online C 2014-06-20 \n",
49 | "4 Rwanda Office Supplies Offline L NaN \n",
50 | "\n",
51 | " id units_sold unit_price \n",
52 | "0 669165933 9925.0 255.28 \n",
53 | "1 963881480 NaN 205.70 \n",
54 | "2 341417157 1779.0 651.21 \n",
55 | "3 514321792 8102.0 9.33 \n",
56 | "4 115456712 5062.0 651.21 "
57 | ],
58 | "text/html": [
59 | "\n",
60 | "\n",
73 | " \n",
74 | " \n",
75 | " \n",
76 | " | \n",
77 | " country | \n",
78 | " type | \n",
79 | " channel | \n",
80 | " priority | \n",
81 | " date | \n",
82 | " id | \n",
83 | " units_sold | \n",
84 | " unit_price | \n",
85 | " \n",
86 | " \n",
87 | " \n",
88 | " \n",
89 | " | 0 | \n",
90 | " Tuvalu | \n",
91 | " Baby Food | \n",
92 | " Offline | \n",
93 | " H | \n",
94 | " 2010-05-28 | \n",
95 | " 669165933 | \n",
96 | " 9925.0 | \n",
97 | " 255.28 | \n",
98 | " \n",
99 | " \n",
100 | " | 1 | \n",
101 | " Grenada | \n",
102 | " Cereal | \n",
103 | " Online | \n",
104 | " C | \n",
105 | " 2012-08-22 | \n",
106 | " 963881480 | \n",
107 | " NaN | \n",
108 | " 205.70 | \n",
109 | " \n",
110 | " \n",
111 | " | 2 | \n",
112 | " Russia | \n",
113 | " NaN | \n",
114 | " Offline | \n",
115 | " L | \n",
116 | " 2014-05-02 | \n",
117 | " 341417157 | \n",
118 | " 1779.0 | \n",
119 | " 651.21 | \n",
120 | " \n",
121 | " \n",
122 | " | 3 | \n",
123 | " Sao Tome and Principe | \n",
124 | " Fruits | \n",
125 | " Online | \n",
126 | " C | \n",
127 | " 2014-06-20 | \n",
128 | " 514321792 | \n",
129 | " 8102.0 | \n",
130 | " 9.33 | \n",
131 | " \n",
132 | " \n",
133 | " | 4 | \n",
134 | " Rwanda | \n",
135 | " Office Supplies | \n",
136 | " Offline | \n",
137 | " L | \n",
138 | " NaN | \n",
139 | " 115456712 | \n",
140 | " 5062.0 | \n",
141 | " 651.21 | \n",
142 | " \n",
143 | " \n",
144 | " \n",
145 | " "
146 | ]
147 | },
148 | "metadata": {},
149 | "execution_count": 3
150 | }
151 | ],
152 | "metadata": {}
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 4,
157 | "source": [
158 | "# filter by type\n",
159 | "df[df[\"type\"]==\"Personal Care\"]"
160 | ],
161 | "outputs": [
162 | {
163 | "output_type": "execute_result",
164 | "data": {
165 | "text/plain": [
166 | " country type channel priority date \\\n",
167 | "8 Republic of the Congo Personal Care Offline M 2015-07-14 \n",
168 | "14 Mongolia Personal Care Offline C 2014-02-19 \n",
169 | "24 Moldova Personal Care Online L 2016-05-07 \n",
170 | "31 South Sudan Personal Care Offline C 2013-12-29 \n",
171 | "35 Costa Rica Personal Care Offline L 2017-05-08 \n",
172 | "40 Niger Personal Care Online H 2017-03-11 \n",
173 | "47 Switzerland Personal Care Online M 2010-12-23 \n",
174 | "66 Gabon Personal Care Offline L 2012-07-08 \n",
175 | "85 Mexico Personal Care Offline L 2012-02-17 \n",
176 | "98 Mexico Personal Care Offline M 2015-07-30 \n",
177 | "101 Mexico Personal Care Offline M 2015-07-30 \n",
178 | "\n",
179 | " id units_sold unit_price \n",
180 | "8 770463311 6070.0 81.73 \n",
181 | "14 832401311 4901.0 81.73 \n",
182 | "24 740147912 5070.0 81.73 \n",
183 | "31 406502997 2125.0 81.73 \n",
184 | "35 456767165 6409.0 81.73 \n",
185 | "40 699285638 3015.0 81.73 \n",
186 | "47 617667090 273.0 81.73 \n",
187 | "66 228944623 8656.0 81.73 \n",
188 | "85 430915820 6422.0 81.73 \n",
189 | "98 559427106 5767.0 81.73 \n",
190 | "101 559427106 5767.0 81.73 "
191 | ],
192 | "text/html": [
193 | "\n",
194 | "\n",
207 | " \n",
208 | " \n",
209 | " \n",
210 | " | \n",
211 | " country | \n",
212 | " type | \n",
213 | " channel | \n",
214 | " priority | \n",
215 | " date | \n",
216 | " id | \n",
217 | " units_sold | \n",
218 | " unit_price | \n",
219 | " \n",
220 | " \n",
221 | " \n",
222 | " \n",
223 | " | 8 | \n",
224 | " Republic of the Congo | \n",
225 | " Personal Care | \n",
226 | " Offline | \n",
227 | " M | \n",
228 | " 2015-07-14 | \n",
229 | " 770463311 | \n",
230 | " 6070.0 | \n",
231 | " 81.73 | \n",
232 | " \n",
233 | " \n",
234 | " | 14 | \n",
235 | " Mongolia | \n",
236 | " Personal Care | \n",
237 | " Offline | \n",
238 | " C | \n",
239 | " 2014-02-19 | \n",
240 | " 832401311 | \n",
241 | " 4901.0 | \n",
242 | " 81.73 | \n",
243 | " \n",
244 | " \n",
245 | " | 24 | \n",
246 | " Moldova | \n",
247 | " Personal Care | \n",
248 | " Online | \n",
249 | " L | \n",
250 | " 2016-05-07 | \n",
251 | " 740147912 | \n",
252 | " 5070.0 | \n",
253 | " 81.73 | \n",
254 | " \n",
255 | " \n",
256 | " | 31 | \n",
257 | " South Sudan | \n",
258 | " Personal Care | \n",
259 | " Offline | \n",
260 | " C | \n",
261 | " 2013-12-29 | \n",
262 | " 406502997 | \n",
263 | " 2125.0 | \n",
264 | " 81.73 | \n",
265 | " \n",
266 | " \n",
267 | " | 35 | \n",
268 | " Costa Rica | \n",
269 | " Personal Care | \n",
270 | " Offline | \n",
271 | " L | \n",
272 | " 2017-05-08 | \n",
273 | " 456767165 | \n",
274 | " 6409.0 | \n",
275 | " 81.73 | \n",
276 | " \n",
277 | " \n",
278 | " | 40 | \n",
279 | " Niger | \n",
280 | " Personal Care | \n",
281 | " Online | \n",
282 | " H | \n",
283 | " 2017-03-11 | \n",
284 | " 699285638 | \n",
285 | " 3015.0 | \n",
286 | " 81.73 | \n",
287 | " \n",
288 | " \n",
289 | " | 47 | \n",
290 | " Switzerland | \n",
291 | " Personal Care | \n",
292 | " Online | \n",
293 | " M | \n",
294 | " 2010-12-23 | \n",
295 | " 617667090 | \n",
296 | " 273.0 | \n",
297 | " 81.73 | \n",
298 | " \n",
299 | " \n",
300 | " | 66 | \n",
301 | " Gabon | \n",
302 | " Personal Care | \n",
303 | " Offline | \n",
304 | " L | \n",
305 | " 2012-07-08 | \n",
306 | " 228944623 | \n",
307 | " 8656.0 | \n",
308 | " 81.73 | \n",
309 | " \n",
310 | " \n",
311 | " | 85 | \n",
312 | " Mexico | \n",
313 | " Personal Care | \n",
314 | " Offline | \n",
315 | " L | \n",
316 | " 2012-02-17 | \n",
317 | " 430915820 | \n",
318 | " 6422.0 | \n",
319 | " 81.73 | \n",
320 | " \n",
321 | " \n",
322 | " | 98 | \n",
323 | " Mexico | \n",
324 | " Personal Care | \n",
325 | " Offline | \n",
326 | " M | \n",
327 | " 2015-07-30 | \n",
328 | " 559427106 | \n",
329 | " 5767.0 | \n",
330 | " 81.73 | \n",
331 | " \n",
332 | " \n",
333 | " | 101 | \n",
334 | " Mexico | \n",
335 | " Personal Care | \n",
336 | " Offline | \n",
337 | " M | \n",
338 | " 2015-07-30 | \n",
339 | " 559427106 | \n",
340 | " 5767.0 | \n",
341 | " 81.73 | \n",
342 | " \n",
343 | " \n",
344 | " \n",
345 | " "
346 | ]
347 | },
348 | "metadata": {},
349 | "execution_count": 4
350 | }
351 | ],
352 | "metadata": {}
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 5,
357 | "source": [
358 | "# show missing value\n",
359 | "df[df.isna().any(axis=1)]"
360 | ],
361 | "outputs": [
362 | {
363 | "output_type": "execute_result",
364 | "data": {
365 | "text/plain": [
366 | " country type channel priority date id \\\n",
367 | "1 Grenada Cereal Online C 2012-08-22 963881480 \n",
368 | "2 Russia NaN Offline L 2014-05-02 341417157 \n",
369 | "4 Rwanda Office Supplies Offline L NaN 115456712 \n",
370 | "5 NaN Baby Food Online C 2015-02-04 547995746 \n",
371 | "\n",
372 | " units_sold unit_price \n",
373 | "1 NaN 205.70 \n",
374 | "2 1779.0 651.21 \n",
375 | "4 5062.0 651.21 \n",
376 | "5 2974.0 255.28 "
377 | ],
378 | "text/html": [
379 | "\n",
380 | "\n",
393 | " \n",
394 | " \n",
395 | " \n",
396 | " | \n",
397 | " country | \n",
398 | " type | \n",
399 | " channel | \n",
400 | " priority | \n",
401 | " date | \n",
402 | " id | \n",
403 | " units_sold | \n",
404 | " unit_price | \n",
405 | " \n",
406 | " \n",
407 | " \n",
408 | " \n",
409 | " | 1 | \n",
410 | " Grenada | \n",
411 | " Cereal | \n",
412 | " Online | \n",
413 | " C | \n",
414 | " 2012-08-22 | \n",
415 | " 963881480 | \n",
416 | " NaN | \n",
417 | " 205.70 | \n",
418 | " \n",
419 | " \n",
420 | " | 2 | \n",
421 | " Russia | \n",
422 | " NaN | \n",
423 | " Offline | \n",
424 | " L | \n",
425 | " 2014-05-02 | \n",
426 | " 341417157 | \n",
427 | " 1779.0 | \n",
428 | " 651.21 | \n",
429 | " \n",
430 | " \n",
431 | " | 4 | \n",
432 | " Rwanda | \n",
433 | " Office Supplies | \n",
434 | " Offline | \n",
435 | " L | \n",
436 | " NaN | \n",
437 | " 115456712 | \n",
438 | " 5062.0 | \n",
439 | " 651.21 | \n",
440 | " \n",
441 | " \n",
442 | " | 5 | \n",
443 | " NaN | \n",
444 | " Baby Food | \n",
445 | " Online | \n",
446 | " C | \n",
447 | " 2015-02-04 | \n",
448 | " 547995746 | \n",
449 | " 2974.0 | \n",
450 | " 255.28 | \n",
451 | " \n",
452 | " \n",
453 | " \n",
454 | " "
455 | ]
456 | },
457 | "metadata": {},
458 | "execution_count": 5
459 | }
460 | ],
461 | "metadata": {}
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": 6,
466 | "source": [
467 | "# drop missing value\n",
468 | "df = df.dropna()\n",
469 | "\n",
470 | "# show missing value after drop \n",
471 | "df[df.isna().any(axis=1)]"
472 | ],
473 | "outputs": [
474 | {
475 | "output_type": "execute_result",
476 | "data": {
477 | "text/plain": [
478 | "Empty DataFrame\n",
479 | "Columns: [country, type, channel, priority, date, id, units_sold, unit_price]\n",
480 | "Index: []"
481 | ],
482 | "text/html": [
483 | "\n",
484 | "\n",
497 | " \n",
498 | " \n",
499 | " \n",
500 | " | \n",
501 | " country | \n",
502 | " type | \n",
503 | " channel | \n",
504 | " priority | \n",
505 | " date | \n",
506 | " id | \n",
507 | " units_sold | \n",
508 | " unit_price | \n",
509 | " \n",
510 | " \n",
511 | " \n",
512 | " \n",
513 | " \n",
514 | " "
515 | ]
516 | },
517 | "metadata": {},
518 | "execution_count": 6
519 | }
520 | ],
521 | "metadata": {}
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": 7,
526 | "source": [
527 | "# check unique column type\n",
528 | "df[\"type\"].unique()"
529 | ],
530 | "outputs": [
531 | {
532 | "output_type": "execute_result",
533 | "data": {
534 | "text/plain": [
535 | "array(['Baby Food', 'Fruits', 'Household', 'Vegetables', 'Personal Care',\n",
536 | " 'Cereal', 'Clothes', 'Cosmetics', 'Beverages', 'Meat', 'Snacks',\n",
537 | " 'Frutis', 'Office Supplies'], dtype=object)"
538 | ]
539 | },
540 | "metadata": {},
541 | "execution_count": 7
542 | }
543 | ],
544 | "metadata": {}
545 | },
546 | {
547 | "cell_type": "code",
548 | "execution_count": 8,
549 | "source": [
550 | "# replace Frutis to Fruits (typo)\n",
551 | "df.loc[df[\"type\"]==\"Frutis\", \"type\"] = \"Fruits\"\n",
552 | "\n",
553 | "# check unique column type\n",
554 | "df[\"type\"].unique()"
555 | ],
556 | "outputs": [
557 | {
558 | "output_type": "execute_result",
559 | "data": {
560 | "text/plain": [
561 | "array(['Baby Food', 'Fruits', 'Household', 'Vegetables', 'Personal Care',\n",
562 | " 'Cereal', 'Clothes', 'Cosmetics', 'Beverages', 'Meat', 'Snacks',\n",
563 | " 'Office Supplies'], dtype=object)"
564 | ]
565 | },
566 | "metadata": {},
567 | "execution_count": 8
568 | }
569 | ],
570 | "metadata": {}
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": 9,
575 | "source": [
576 | "# duplicate data\n",
577 | "df[df.duplicated()]"
578 | ],
579 | "outputs": [
580 | {
581 | "output_type": "execute_result",
582 | "data": {
583 | "text/plain": [
584 | " country type channel priority date id \\\n",
585 | "101 Mexico Personal Care Offline M 2015-07-30 559427106 \n",
586 | "\n",
587 | " units_sold unit_price \n",
588 | "101 5767.0 81.73 "
589 | ],
590 | "text/html": [
591 | "\n",
592 | "\n",
605 | " \n",
606 | " \n",
607 | " \n",
608 | " | \n",
609 | " country | \n",
610 | " type | \n",
611 | " channel | \n",
612 | " priority | \n",
613 | " date | \n",
614 | " id | \n",
615 | " units_sold | \n",
616 | " unit_price | \n",
617 | " \n",
618 | " \n",
619 | " \n",
620 | " \n",
621 | " | 101 | \n",
622 | " Mexico | \n",
623 | " Personal Care | \n",
624 | " Offline | \n",
625 | " M | \n",
626 | " 2015-07-30 | \n",
627 | " 559427106 | \n",
628 | " 5767.0 | \n",
629 | " 81.73 | \n",
630 | " \n",
631 | " \n",
632 | " \n",
633 | " "
634 | ]
635 | },
636 | "metadata": {},
637 | "execution_count": 9
638 | }
639 | ],
640 | "metadata": {}
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": 10,
645 | "source": [
646 | "# check record yang duplicate\n",
647 | "df[df[\"id\"]==559427106]"
648 | ],
649 | "outputs": [
650 | {
651 | "output_type": "execute_result",
652 | "data": {
653 | "text/plain": [
654 | " country type channel priority date id \\\n",
655 | "98 Mexico Personal Care Offline M 2015-07-30 559427106 \n",
656 | "101 Mexico Personal Care Offline M 2015-07-30 559427106 \n",
657 | "\n",
658 | " units_sold unit_price \n",
659 | "98 5767.0 81.73 \n",
660 | "101 5767.0 81.73 "
661 | ],
662 | "text/html": [
663 | "\n",
664 | "\n",
677 | " \n",
678 | " \n",
679 | " \n",
680 | " | \n",
681 | " country | \n",
682 | " type | \n",
683 | " channel | \n",
684 | " priority | \n",
685 | " date | \n",
686 | " id | \n",
687 | " units_sold | \n",
688 | " unit_price | \n",
689 | " \n",
690 | " \n",
691 | " \n",
692 | " \n",
693 | " | 98 | \n",
694 | " Mexico | \n",
695 | " Personal Care | \n",
696 | " Offline | \n",
697 | " M | \n",
698 | " 2015-07-30 | \n",
699 | " 559427106 | \n",
700 | " 5767.0 | \n",
701 | " 81.73 | \n",
702 | " \n",
703 | " \n",
704 | " | 101 | \n",
705 | " Mexico | \n",
706 | " Personal Care | \n",
707 | " Offline | \n",
708 | " M | \n",
709 | " 2015-07-30 | \n",
710 | " 559427106 | \n",
711 | " 5767.0 | \n",
712 | " 81.73 | \n",
713 | " \n",
714 | " \n",
715 | " \n",
716 | " "
717 | ]
718 | },
719 | "metadata": {},
720 | "execution_count": 10
721 | }
722 | ],
723 | "metadata": {}
724 | },
725 | {
726 | "cell_type": "code",
727 | "execution_count": 11,
728 | "source": [
729 | "# drop duplicate\n",
730 | "df = df.drop_duplicates()\n",
731 | "\n",
732 | "# duplicate data again\n",
733 | "df[df.duplicated()]"
734 | ],
735 | "outputs": [
736 | {
737 | "output_type": "execute_result",
738 | "data": {
739 | "text/plain": [
740 | "Empty DataFrame\n",
741 | "Columns: [country, type, channel, priority, date, id, units_sold, unit_price]\n",
742 | "Index: []"
743 | ],
744 | "text/html": [
745 | "\n",
746 | "\n",
759 | " \n",
760 | " \n",
761 | " \n",
762 | " | \n",
763 | " country | \n",
764 | " type | \n",
765 | " channel | \n",
766 | " priority | \n",
767 | " date | \n",
768 | " id | \n",
769 | " units_sold | \n",
770 | " unit_price | \n",
771 | " \n",
772 | " \n",
773 | " \n",
774 | " \n",
775 | " \n",
776 | " "
777 | ]
778 | },
779 | "metadata": {},
780 | "execution_count": 11
781 | }
782 | ],
783 | "metadata": {}
784 | },
785 | {
786 | "cell_type": "code",
787 | "execution_count": 12,
788 | "source": [
789 | "# export hasil cleaning\n",
790 | "df.to_csv(\"data-warehouse/sales_data_cleaned.csv\", index=False)"
791 | ],
792 | "outputs": [],
793 | "metadata": {}
794 | }
795 | ],
796 | "metadata": {
797 | "orig_nbformat": 4,
798 | "language_info": {
799 | "name": "python",
800 | "version": "3.8.10",
801 | "mimetype": "text/x-python",
802 | "codemirror_mode": {
803 | "name": "ipython",
804 | "version": 3
805 | },
806 | "pygments_lexer": "ipython3",
807 | "nbconvert_exporter": "python",
808 | "file_extension": ".py"
809 | },
810 | "kernelspec": {
811 | "name": "python3",
812 | "display_name": "Python 3.8.10 64-bit"
813 | },
814 | "interpreter": {
815 | "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
816 | }
817 | },
818 | "nbformat": 4,
819 | "nbformat_minor": 2
820 | }
--------------------------------------------------------------------------------
|