├── .gitignore ├── 1-data-modeling ├── L1_Exercise_1_Creating_a_Table_with_Postgres.ipynb ├── L1_Exercise_2_Creating_a_Table_with_Apache_Cassandra.ipynb ├── L2_Exercise_1_Creating_Normalized_Tables.ipynb ├── L2_Exercise_2_Creating_Denormalized_Tables.ipynb ├── L2_Exercise_3_Creating_Fact_and_Dimension_Tables_with_Star_Schema.ipynb ├── L3-Project_Data_Modeling_with_Postgres │ ├── .gitignore │ ├── README.md │ ├── create_tables.py │ ├── etl.ipynb │ ├── etl.py │ ├── sql_queries.py │ └── test.ipynb ├── L4-demo-1-2-queries-2-tables.ipynb ├── L4-demo-2-primary-key.ipynb ├── L4-demo-3-clustering-column.ipynb ├── L4-demo-4-using-the-where-clause.ipynb ├── L4_Exercise_1_Three_Queries_Three_Tables.ipynb ├── L4_Exercise_2_Primary_Key.ipynb ├── L4_Exercise_3_Clustering_Column.ipynb ├── L4_Exercise_4_Using_the_WHERE_Clause.ipynb └── L5-Project_Data_Modeling_with_Apache_Cassandra │ ├── .gitignore │ └── Project_1B_Project_Template.ipynb ├── 2-cloud-data-warehouses ├── L1_E1_-_Step_1_&_2.ipynb ├── L1_E1_-_Step_3.ipynb ├── L1_E1_-_Step_4.ipynb ├── L1_E1_-_Step_5.ipynb ├── L1_E1_-_Step_6.ipynb ├── L1_E2_-_1_-_Slicing_and_Dicing.ipynb ├── L1_E2_-_2_-_Roll_up_and_Drill_Down.ipynb ├── L1_E2_-_3_-_Grouping_Sets.ipynb ├── L1_E2_-_4_-_CUBE.ipynb ├── L1_E3_-_Columnar_Vs_Row_Storage.ipynb ├── L3_Exercise_2_-_IaC.ipynb ├── L3_Exercise_3_-_Parallel_ETL.ipynb ├── L3_Exercise_4_-_Table_Design.ipynb └── L4_Project_-_Data_Warehouse │ ├── .gitignore │ ├── README.md │ ├── analyze.py │ ├── aws_check_cluster_available.py │ ├── aws_create_cluster.py │ ├── aws_destroy_cluster.py │ ├── create_tables.py │ ├── data-warehouse-project-der-diagram.png │ ├── dwh.cfg.example │ ├── etl.py │ └── sql_queries.py ├── 3-data-lakes-with-spark ├── 10_L4_Exercise_2_-_Advanced_Analytics_NLP.ipynb ├── 11_L4_Exercise_3_-_Data_Lake_on_S3.ipynb ├── 1_procedural_vs_functional_in_python.ipynb ├── 2_spark_maps_and_lazy_evaluation.ipynb ├── 3_data_inputs_and_outputs.ipynb ├── 4_data_wrangling.ipynb ├── 5_dataframe_quiz.ipynb ├── 7_data_wrangling-sql.ipynb ├── 8_spark_sql_quiz.ipynb ├── 9_L4_Exercise_1_-_Schema_On_Read.ipynb ├── L4_Project │ ├── .gitignore │ ├── README.md │ ├── dl.cfg.example │ └── etl.py ├── data │ └── sparkify_log_small.json └── mapreduce_practice.ipynb ├── 4-data-pipelines-with-airflow ├── L1_exercises │ ├── exercise1.py │ ├── exercise2.py │ ├── exercise3.py │ ├── exercise4.py │ ├── exercise5.py │ ├── exercise6.py │ └── sql_statements.py ├── L2_exercises │ ├── exercise1.py │ ├── exercise2.py │ ├── exercise3.py │ ├── exercise4.py │ └── sql_statements.py ├── L3_exercises │ ├── exercise1.py │ ├── exercise2.py │ ├── exercise3 │ │ ├── dag.py │ │ └── subdag.py │ ├── exercise4.py │ ├── operators │ │ ├── __init__.py │ │ ├── facts_calculator.py │ │ ├── has_rows.py │ │ └── s3_to_redshift.py │ └── sql_statements.py └── L4_project │ ├── README.md │ ├── create_tables.sql │ ├── dags │ └── sparkify_analytical_tables_dag.py │ ├── images │ └── dag.png │ └── plugins │ ├── __init__.py │ ├── helpers │ ├── __init__.py │ └── sql_queries.py │ └── operators │ ├── __init__.py │ ├── data_quality.py │ ├── load_dimension.py │ ├── load_fact.py │ └── stage_redshift.py ├── 5-capstone-project ├── README.md └── datasets-exploration.ipynb ├── README.md └── explorations └── nyc-taxi-challenge.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/.gitignore -------------------------------------------------------------------------------- /1-data-modeling/L1_Exercise_1_Creating_a_Table_with_Postgres.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L1_Exercise_1_Creating_a_Table_with_Postgres.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L1_Exercise_2_Creating_a_Table_with_Apache_Cassandra.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L1_Exercise_2_Creating_a_Table_with_Apache_Cassandra.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L2_Exercise_1_Creating_Normalized_Tables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L2_Exercise_1_Creating_Normalized_Tables.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L2_Exercise_2_Creating_Denormalized_Tables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L2_Exercise_2_Creating_Denormalized_Tables.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L2_Exercise_3_Creating_Fact_and_Dimension_Tables_with_Star_Schema.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L2_Exercise_3_Creating_Fact_and_Dimension_Tables_with_Star_Schema.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | .ipynb_checkpoints 3 | __pycache__ 4 | -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L3-Project_Data_Modeling_with_Postgres/README.md -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/create_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L3-Project_Data_Modeling_with_Postgres/create_tables.py -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/etl.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L3-Project_Data_Modeling_with_Postgres/etl.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/etl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L3-Project_Data_Modeling_with_Postgres/etl.py -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/sql_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L3-Project_Data_Modeling_with_Postgres/sql_queries.py -------------------------------------------------------------------------------- /1-data-modeling/L3-Project_Data_Modeling_with_Postgres/test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L3-Project_Data_Modeling_with_Postgres/test.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4-demo-1-2-queries-2-tables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4-demo-1-2-queries-2-tables.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4-demo-2-primary-key.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4-demo-2-primary-key.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4-demo-3-clustering-column.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4-demo-3-clustering-column.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4-demo-4-using-the-where-clause.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4-demo-4-using-the-where-clause.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4_Exercise_1_Three_Queries_Three_Tables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4_Exercise_1_Three_Queries_Three_Tables.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4_Exercise_2_Primary_Key.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4_Exercise_2_Primary_Key.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4_Exercise_3_Clustering_Column.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4_Exercise_3_Clustering_Column.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L4_Exercise_4_Using_the_WHERE_Clause.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L4_Exercise_4_Using_the_WHERE_Clause.ipynb -------------------------------------------------------------------------------- /1-data-modeling/L5-Project_Data_Modeling_with_Apache_Cassandra/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | event_datafile_new.csv 3 | -------------------------------------------------------------------------------- /1-data-modeling/L5-Project_Data_Modeling_with_Apache_Cassandra/Project_1B_Project_Template.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/1-data-modeling/L5-Project_Data_Modeling_with_Apache_Cassandra/Project_1B_Project_Template.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E1_-_Step_1_&_2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E1_-_Step_1_&_2.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E1_-_Step_3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E1_-_Step_3.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E1_-_Step_4.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E1_-_Step_4.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E1_-_Step_5.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E1_-_Step_5.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E1_-_Step_6.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E1_-_Step_6.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E2_-_1_-_Slicing_and_Dicing.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E2_-_1_-_Slicing_and_Dicing.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E2_-_2_-_Roll_up_and_Drill_Down.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E2_-_2_-_Roll_up_and_Drill_Down.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E2_-_3_-_Grouping_Sets.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E2_-_3_-_Grouping_Sets.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E2_-_4_-_CUBE.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E2_-_4_-_CUBE.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L1_E3_-_Columnar_Vs_Row_Storage.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L1_E3_-_Columnar_Vs_Row_Storage.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L3_Exercise_2_-_IaC.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L3_Exercise_2_-_IaC.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L3_Exercise_3_-_Parallel_ETL.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L3_Exercise_3_-_Parallel_ETL.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L3_Exercise_4_-_Table_Design.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L3_Exercise_4_-_Table_Design.ipynb -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/.gitignore: -------------------------------------------------------------------------------- 1 | dwh.cfg 2 | .ipynb_checkpoints 3 | __pycache__ 4 | data 5 | -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/README.md -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/analyze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/analyze.py -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/aws_check_cluster_available.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/aws_check_cluster_available.py -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/aws_create_cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/aws_create_cluster.py -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/aws_destroy_cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/aws_destroy_cluster.py -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/create_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/create_tables.py -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/data-warehouse-project-der-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/data-warehouse-project-der-diagram.png -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/dwh.cfg.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/dwh.cfg.example -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/etl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/etl.py -------------------------------------------------------------------------------- /2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/sql_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/2-cloud-data-warehouses/L4_Project_-_Data_Warehouse/sql_queries.py -------------------------------------------------------------------------------- /3-data-lakes-with-spark/10_L4_Exercise_2_-_Advanced_Analytics_NLP.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/10_L4_Exercise_2_-_Advanced_Analytics_NLP.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/11_L4_Exercise_3_-_Data_Lake_on_S3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/11_L4_Exercise_3_-_Data_Lake_on_S3.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/1_procedural_vs_functional_in_python.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/1_procedural_vs_functional_in_python.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/2_spark_maps_and_lazy_evaluation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/2_spark_maps_and_lazy_evaluation.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/3_data_inputs_and_outputs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/3_data_inputs_and_outputs.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/4_data_wrangling.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/4_data_wrangling.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/5_dataframe_quiz.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/5_dataframe_quiz.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/7_data_wrangling-sql.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/7_data_wrangling-sql.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/8_spark_sql_quiz.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/8_spark_sql_quiz.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/9_L4_Exercise_1_-_Schema_On_Read.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/9_L4_Exercise_1_-_Schema_On_Read.ipynb -------------------------------------------------------------------------------- /3-data-lakes-with-spark/L4_Project/.gitignore: -------------------------------------------------------------------------------- 1 | dl.cfg -------------------------------------------------------------------------------- /3-data-lakes-with-spark/L4_Project/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/L4_Project/README.md -------------------------------------------------------------------------------- /3-data-lakes-with-spark/L4_Project/dl.cfg.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/L4_Project/dl.cfg.example -------------------------------------------------------------------------------- /3-data-lakes-with-spark/L4_Project/etl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/L4_Project/etl.py -------------------------------------------------------------------------------- /3-data-lakes-with-spark/data/sparkify_log_small.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/data/sparkify_log_small.json -------------------------------------------------------------------------------- /3-data-lakes-with-spark/mapreduce_practice.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/3-data-lakes-with-spark/mapreduce_practice.ipynb -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/exercise1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/exercise1.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/exercise2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/exercise2.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/exercise3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/exercise3.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/exercise4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/exercise4.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/exercise5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/exercise5.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/exercise6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/exercise6.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L1_exercises/sql_statements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L1_exercises/sql_statements.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L2_exercises/exercise1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L2_exercises/exercise1.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L2_exercises/exercise2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L2_exercises/exercise2.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L2_exercises/exercise3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L2_exercises/exercise3.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L2_exercises/exercise4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L2_exercises/exercise4.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L2_exercises/sql_statements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L2_exercises/sql_statements.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/exercise1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/exercise1.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/exercise2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/exercise2.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/exercise3/dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/exercise3/dag.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/exercise3/subdag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/exercise3/subdag.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/exercise4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/exercise4.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/operators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/operators/__init__.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/operators/facts_calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/operators/facts_calculator.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/operators/has_rows.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/operators/has_rows.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/operators/s3_to_redshift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/operators/s3_to_redshift.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L3_exercises/sql_statements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L3_exercises/sql_statements.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/README.md -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/create_tables.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/create_tables.sql -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/dags/sparkify_analytical_tables_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/dags/sparkify_analytical_tables_dag.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/images/dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/images/dag.png -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/__init__.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/helpers/__init__.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/helpers/sql_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/helpers/sql_queries.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/operators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/operators/__init__.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/operators/data_quality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/operators/data_quality.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/operators/load_dimension.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/operators/load_dimension.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/operators/load_fact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/operators/load_fact.py -------------------------------------------------------------------------------- /4-data-pipelines-with-airflow/L4_project/plugins/operators/stage_redshift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/4-data-pipelines-with-airflow/L4_project/plugins/operators/stage_redshift.py -------------------------------------------------------------------------------- /5-capstone-project/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/5-capstone-project/README.md -------------------------------------------------------------------------------- /5-capstone-project/datasets-exploration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/5-capstone-project/datasets-exploration.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/README.md -------------------------------------------------------------------------------- /explorations/nyc-taxi-challenge.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabfr/data-engineering-nanodegree/HEAD/explorations/nyc-taxi-challenge.ipynb --------------------------------------------------------------------------------