├── .gitignore ├── Exercises ├── Exercise-1 │ ├── Dockerfile │ ├── README.md │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-10 │ ├── Dockerfile │ ├── README.md │ ├── data │ │ └── 202306-divvy-tripdata.csv │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-2 │ ├── Dockerfile │ ├── README.md │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-3 │ ├── Dockerfile │ ├── README.md │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-4 │ ├── Dockerfile │ ├── README.md │ ├── data │ │ ├── enough_already │ │ │ └── file-4.json │ │ ├── file-1.json │ │ ├── other_folder │ │ │ └── file-3.json │ │ └── some_folder │ │ │ ├── other_folder │ │ │ └── file-2.json │ │ │ └── test.csv │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-5 │ ├── Dockerfile │ ├── README.md │ ├── data │ │ ├── accounts.csv │ │ ├── products.csv │ │ └── transactions.csv │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-6 │ ├── Dockerfile │ ├── README.md │ ├── data │ │ ├── Divvy_Trips_2019_Q4.zip │ │ └── Divvy_Trips_2020_Q1.zip │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-7 │ ├── Dockerfile │ ├── README.md │ ├── data │ │ └── hard-drive-2022-01-01-failures.csv.zip │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt ├── Exercise-8 │ ├── Dockerfile │ ├── README.md │ ├── data │ │ └── Electric_Vehicle_Population_Data.csv │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt └── Exercise-9 │ ├── Dockerfile │ ├── README.md │ ├── data │ └── 202306-divvy-tripdata.csv │ ├── docker-compose.yml │ ├── main.py │ └── requirements.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.idea* 2 | *.DS_Store* 3 | *postgres-data* -------------------------------------------------------------------------------- /Exercises/Exercise-1/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-1/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-1/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-1/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-1/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-1/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-1/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-1/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.27.1 -------------------------------------------------------------------------------- /Exercises/Exercise-10/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-10/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-10/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-10/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-10/data/202306-divvy-tripdata.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-10/data/202306-divvy-tripdata.csv -------------------------------------------------------------------------------- /Exercises/Exercise-10/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-10/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-10/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-10/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-10/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | great-expectations -------------------------------------------------------------------------------- /Exercises/Exercise-2/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-2/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-2/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-2/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-2/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-2/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-2/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-2/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-2/requirements.txt -------------------------------------------------------------------------------- /Exercises/Exercise-3/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-3/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-3/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-3/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-3/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-3/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-3/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-3/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.21.2 -------------------------------------------------------------------------------- /Exercises/Exercise-4/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-4/data/enough_already/file-4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/data/enough_already/file-4.json -------------------------------------------------------------------------------- /Exercises/Exercise-4/data/file-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/data/file-1.json -------------------------------------------------------------------------------- /Exercises/Exercise-4/data/other_folder/file-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/data/other_folder/file-3.json -------------------------------------------------------------------------------- /Exercises/Exercise-4/data/some_folder/other_folder/file-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/data/some_folder/other_folder/file-2.json -------------------------------------------------------------------------------- /Exercises/Exercise-4/data/some_folder/test.csv: -------------------------------------------------------------------------------- 1 | not a json file | data point 2 | 1, 2 3 | 4, 5 4 | 5, 7 -------------------------------------------------------------------------------- /Exercises/Exercise-4/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-4/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-4/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-4/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.27.1 -------------------------------------------------------------------------------- /Exercises/Exercise-5/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-5/data/accounts.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/data/accounts.csv -------------------------------------------------------------------------------- /Exercises/Exercise-5/data/products.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/data/products.csv -------------------------------------------------------------------------------- /Exercises/Exercise-5/data/transactions.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/data/transactions.csv -------------------------------------------------------------------------------- /Exercises/Exercise-5/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-5/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-5/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-5/requirements.txt: -------------------------------------------------------------------------------- 1 | psycopg2 -------------------------------------------------------------------------------- /Exercises/Exercise-6/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-6/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-6/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-6/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-6/data/Divvy_Trips_2019_Q4.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-6/data/Divvy_Trips_2019_Q4.zip -------------------------------------------------------------------------------- /Exercises/Exercise-6/data/Divvy_Trips_2020_Q1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-6/data/Divvy_Trips_2020_Q1.zip -------------------------------------------------------------------------------- /Exercises/Exercise-6/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-6/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-6/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-6/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-6/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pyspark -------------------------------------------------------------------------------- /Exercises/Exercise-7/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-7/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-7/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-7/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-7/data/hard-drive-2022-01-01-failures.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-7/data/hard-drive-2022-01-01-failures.csv.zip -------------------------------------------------------------------------------- /Exercises/Exercise-7/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-7/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-7/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-7/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-7/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pyspark -------------------------------------------------------------------------------- /Exercises/Exercise-8/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-8/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-8/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-8/data/Electric_Vehicle_Population_Data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-8/data/Electric_Vehicle_Population_Data.csv -------------------------------------------------------------------------------- /Exercises/Exercise-8/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-8/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-8/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-8/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-8/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | duckdb -------------------------------------------------------------------------------- /Exercises/Exercise-9/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-9/Dockerfile -------------------------------------------------------------------------------- /Exercises/Exercise-9/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-9/README.md -------------------------------------------------------------------------------- /Exercises/Exercise-9/data/202306-divvy-tripdata.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-9/data/202306-divvy-tripdata.csv -------------------------------------------------------------------------------- /Exercises/Exercise-9/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-9/docker-compose.yml -------------------------------------------------------------------------------- /Exercises/Exercise-9/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/Exercises/Exercise-9/main.py -------------------------------------------------------------------------------- /Exercises/Exercise-9/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | polars -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/data-engineering-practice/HEAD/README.md --------------------------------------------------------------------------------