├── .gitignore ├── Makefile ├── README.md ├── adventureworks ├── __init__.py ├── ddl │ ├── create_bronze_tables.py │ ├── create_gold_tables.py │ ├── create_interface_views.py │ ├── create_silver_tables.py │ └── create_tables.py ├── great_expectations │ ├── .gitignore │ ├── checkpoints │ │ └── dq_checkpoint.yml │ ├── expectations │ │ ├── .ge_store_backend_id │ │ ├── customer.json │ │ ├── dim_customer.json │ │ ├── fct_orders.json │ │ ├── orders.json │ │ └── sales_mart.json │ ├── great_expectations.yml │ └── plugins │ │ └── custom_data_docs │ │ └── styles │ │ └── data_docs_custom_styles.css ├── pipelines │ ├── __init__.py │ ├── sales_mart.py │ └── utils │ │ ├── __init__.py │ │ └── create_fake_data.py └── tests │ ├── __init__.py │ ├── conftest.py │ ├── integration │ └── test_int_sales_mart.py │ └── unit │ └── test_sales_mart.py ├── assets └── images │ ├── cust_order.png │ └── data_flow.png ├── containers ├── airflow │ ├── Dockerfile │ └── requirements.txt └── spark │ ├── Dockerfile │ └── requirements.txt ├── docker-compose.yml ├── env └── logs └── scheduler └── latest /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/.gitignore -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/README.md -------------------------------------------------------------------------------- /adventureworks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /adventureworks/ddl/create_bronze_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/ddl/create_bronze_tables.py -------------------------------------------------------------------------------- /adventureworks/ddl/create_gold_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/ddl/create_gold_tables.py -------------------------------------------------------------------------------- /adventureworks/ddl/create_interface_views.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/ddl/create_interface_views.py -------------------------------------------------------------------------------- /adventureworks/ddl/create_silver_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/ddl/create_silver_tables.py -------------------------------------------------------------------------------- /adventureworks/ddl/create_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/ddl/create_tables.py -------------------------------------------------------------------------------- /adventureworks/great_expectations/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/.gitignore -------------------------------------------------------------------------------- /adventureworks/great_expectations/checkpoints/dq_checkpoint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/checkpoints/dq_checkpoint.yml -------------------------------------------------------------------------------- /adventureworks/great_expectations/expectations/.ge_store_backend_id: -------------------------------------------------------------------------------- 1 | store_backend_id = f9438db4-cc90-4afa-bfad-1ff2a615495d 2 | -------------------------------------------------------------------------------- /adventureworks/great_expectations/expectations/customer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/expectations/customer.json -------------------------------------------------------------------------------- /adventureworks/great_expectations/expectations/dim_customer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/expectations/dim_customer.json -------------------------------------------------------------------------------- /adventureworks/great_expectations/expectations/fct_orders.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/expectations/fct_orders.json -------------------------------------------------------------------------------- /adventureworks/great_expectations/expectations/orders.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/expectations/orders.json -------------------------------------------------------------------------------- /adventureworks/great_expectations/expectations/sales_mart.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/expectations/sales_mart.json -------------------------------------------------------------------------------- /adventureworks/great_expectations/great_expectations.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/great_expectations.yml -------------------------------------------------------------------------------- /adventureworks/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css -------------------------------------------------------------------------------- /adventureworks/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /adventureworks/pipelines/sales_mart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/pipelines/sales_mart.py -------------------------------------------------------------------------------- /adventureworks/pipelines/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /adventureworks/pipelines/utils/create_fake_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/pipelines/utils/create_fake_data.py -------------------------------------------------------------------------------- /adventureworks/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /adventureworks/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/tests/conftest.py -------------------------------------------------------------------------------- /adventureworks/tests/integration/test_int_sales_mart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/tests/integration/test_int_sales_mart.py -------------------------------------------------------------------------------- /adventureworks/tests/unit/test_sales_mart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/adventureworks/tests/unit/test_sales_mart.py -------------------------------------------------------------------------------- /assets/images/cust_order.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/assets/images/cust_order.png -------------------------------------------------------------------------------- /assets/images/data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/assets/images/data_flow.png -------------------------------------------------------------------------------- /containers/airflow/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/containers/airflow/Dockerfile -------------------------------------------------------------------------------- /containers/airflow/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/containers/airflow/requirements.txt -------------------------------------------------------------------------------- /containers/spark/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/containers/spark/Dockerfile -------------------------------------------------------------------------------- /containers/spark/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/containers/spark/requirements.txt -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/data_engineering_best_practices/HEAD/env -------------------------------------------------------------------------------- /logs/scheduler/latest: -------------------------------------------------------------------------------- 1 | /opt/airflow/logs/scheduler/2024-02-23 --------------------------------------------------------------------------------