├── .devcontainer └── devcontainer.json ├── .github ├── CODEOWNERS └── workflows │ ├── cd.yml │ └── ci.yml ├── .gitignore ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── answers-setup-data-project.ipynb ├── assets ├── images │ ├── airflow.png │ ├── cs3.png │ ├── dagdep.png │ ├── data-tools.png │ ├── data_flow.png │ ├── dbtps.png │ ├── dep-arch.png │ ├── docker.png │ ├── duckdb.png │ ├── folder.png │ ├── inst.png │ ├── pytest.png │ ├── tpch_erd.png │ └── wap.png └── videos │ └── perms.mp4 ├── containers └── airflow │ ├── Dockerfile │ ├── quarto.sh │ └── requirements.txt ├── dags ├── __init__.py ├── etl │ ├── __init__.py │ ├── bronze │ │ ├── customer.py │ │ ├── lineitem.py │ │ ├── nation.py │ │ ├── orders.py │ │ └── region.py │ ├── gold │ │ ├── obt │ │ │ ├── wide_lineitem.py │ │ │ └── wide_orders.py │ │ └── pre_aggregated │ │ │ └── customer_outreach_metrics.py │ └── silver │ │ ├── dim_customer.py │ │ ├── fct_lineitem.py │ │ └── fct_orders.py ├── run_pipeline.py ├── tests │ ├── __init__.py │ └── unit │ │ ├── __init__.py │ │ └── test_dim_customer.py ├── tpch_etl.py └── utils │ └── metadata.py ├── docker-compose.yml ├── logs └── scheduler │ └── latest ├── requirements.txt ├── setup-data-project.ipynb ├── setup ├── create_input_data.py └── ec2 │ └── init.sh └── terraform ├── main.tf ├── output.tf └── variable.tf /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "mcr.microsoft.com/devcontainers/universal:2", 3 | "hostRequirements": { 4 | "cpus": 2 5 | }, 6 | "waitFor": "onCreateCommand", 7 | "updateContentCommand": "python3 -m pip install -r requirements.txt", 8 | "postCreateCommand": "", 9 | "customizations": { 10 | "codespaces": { 11 | "openFiles": [] 12 | }, 13 | "vscode": { 14 | "extensions": [ 15 | "ms-toolsai.jupyter", 16 | "ms-python.python" 17 | ] 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This is a comment. 2 | # Each line is a file pattern followed by one or more owners. 3 | 4 | # These owners will be the default owners for everything in 5 | # the repo. Unless a later match takes precedence, 6 | # @josephmachado will be requested for 7 | # review when someone opens a pull request. 8 | * @josephmachado 9 | -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | name: CD 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | deploy-to-ec2: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: checkout repo 11 | uses: actions/checkout@v2 12 | - name: Deploy to server 13 | uses: easingthemes/ssh-deploy@main 14 | env: 15 | SSH_PRIVATE_KEY: ${{ secrets.SERVER_SSH_KEY }} 16 | REMOTE_HOST: ${{ secrets.REMOTE_HOST }} 17 | REMOTE_USER: ${{ secrets.REMOTE_USER }} 18 | SOURCE: "./" 19 | TARGET: "/home/ubuntu/de_project" 20 | ARGS: "--rsync-path='sudo rsync' -avz" 21 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [pull_request] 3 | jobs: 4 | run-ci-tests: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: checkout repo 8 | uses: actions/checkout@v2 9 | - name: Grant perms 10 | run: make perms 11 | - name: Spin up containers 12 | run: make up 13 | - name: Run CI test 14 | run: make ci 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | tpch.db 165 | metadata.db 166 | example.db 167 | 168 | .ipynb_checkpoints/ 169 | 170 | 171 | ###### TERRAFORM IGNORE 172 | # Local .terraform directories 173 | **/.terraform/* 174 | 175 | # .tfstate files 176 | *.tfstate 177 | *.tfstate.* 178 | 179 | # Crash log files 180 | crash.log 181 | crash.*.log 182 | 183 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as 184 | # password, private keys, and other secrets. These should not be part of version 185 | # control as they are data points which are potentially sensitive and subject 186 | # to change depending on the environment. 187 | *.tfvars 188 | *.tfvars.json 189 | 190 | # Ignore override files as they are usually used to override resources locally and so 191 | # are not checked in 192 | override.tf 193 | override.tf.json 194 | *_override.tf 195 | *_override.tf.json 196 | 197 | # Include override files you do wish to add to version control using negated pattern 198 | # !example_override.tf 199 | 200 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 201 | # example: *tfplan* 202 | 203 | # Ignore CLI configuration files 204 | .terraformrc 205 | terraform.rc 206 | Footer 207 | 208 | *.pem 209 | .terraform/ 210 | .terraform.lock.hcl 211 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #################################################################################################################### 2 | # Setup containers to run Airflow 3 | 4 | docker-spin-up: 5 | docker compose build && docker compose up airflow-init && docker compose up --build -d 6 | 7 | perms: 8 | sudo mkdir -p logs plugins temp dags tests data visualization && sudo chmod -R u=rwx,g=rwx,o=rwx logs plugins temp dags tests data visualization 9 | 10 | do-sleep: 11 | sleep 30 12 | 13 | create-data: 14 | docker exec scheduler python /opt/airflow/setup/create_input_data.py 15 | 16 | up: perms docker-spin-up do-sleep create-data 17 | 18 | down: 19 | docker compose down 20 | 21 | restart: down up 22 | 23 | sh: 24 | docker exec -ti webserver bash 25 | 26 | #################################################################################################################### 27 | # Testing, auto formatting, type checks, & Lint checks 28 | pytest: 29 | docker exec webserver pytest -p no:warnings -v /opt/airflow/dags/tests 30 | 31 | format: 32 | docker exec webserver python -m black -S --line-length 79 . 33 | 34 | isort: 35 | docker exec webserver isort . 36 | 37 | type: 38 | docker exec webserver mypy --ignore-missing-imports /opt/airflow 39 | 40 | lint: 41 | docker exec webserver flake8 --exclude=.ipynb_checkpoints /opt/airflow/dags 42 | 43 | ci: isort format type lint pytest 44 | 45 | #################################################################################################################### 46 | # Helpers 47 | 48 | ssh-ec2: 49 | terraform -chdir=./terraform output -raw private_key > private_key.pem && chmod 600 private_key.pem && ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i private_key.pem ubuntu@$$(terraform -chdir=./terraform output -raw ec2_public_dns) && rm private_key.pem 50 | 51 | 52 | 53 | cloud-airflow: 54 | terraform -chdir=./terraform output -raw private_key > private_key.pem && chmod 600 private_key.pem && ssh -o "IdentitiesOnly yes" -i private_key.pem ubuntu@$$(terraform -chdir=./terraform output -raw ec2_public_dns) -N -f -L 8080:$$(terraform -chdir=./terraform output -raw ec2_public_dns):8080 && open http://localhost:8080 && rm private_key.pem 55 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | polars = "*" 8 | cuallee = "*" 9 | pytest = "*" 10 | jupyterlab = "*" 11 | duckdb = "*" 12 | pyarrow = "*" 13 | black = "*" 14 | 15 | [dev-packages] 16 | 17 | [requires] 18 | python_version = "3.12" 19 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "5cf3216bbe9142285582c9db237fa93253dc6417ae031ae4b8eee8b35d3ef6c0" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.12" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "anyio": { 20 | "hashes": [ 21 | "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94", 22 | "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7" 23 | ], 24 | "markers": "python_version >= '3.8'", 25 | "version": "==4.4.0" 26 | }, 27 | "argon2-cffi": { 28 | "hashes": [ 29 | "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", 30 | "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea" 31 | ], 32 | "markers": "python_version >= '3.7'", 33 | "version": "==23.1.0" 34 | }, 35 | "argon2-cffi-bindings": { 36 | "hashes": [ 37 | "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670", 38 | "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f", 39 | "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583", 40 | "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194", 41 | "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c", 42 | "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a", 43 | "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082", 44 | "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5", 45 | "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f", 46 | "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7", 47 | "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d", 48 | "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f", 49 | "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae", 50 | "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3", 51 | "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86", 52 | "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367", 53 | "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d", 54 | "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", 55 | "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb", 56 | "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e", 57 | "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351" 58 | ], 59 | "markers": "python_version >= '3.6'", 60 | "version": "==21.2.0" 61 | }, 62 | "arrow": { 63 | "hashes": [ 64 | "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", 65 | "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85" 66 | ], 67 | "markers": "python_version >= '3.8'", 68 | "version": "==1.3.0" 69 | }, 70 | "asttokens": { 71 | "hashes": [ 72 | "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", 73 | "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0" 74 | ], 75 | "version": "==2.4.1" 76 | }, 77 | "async-lru": { 78 | "hashes": [ 79 | "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627", 80 | "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224" 81 | ], 82 | "markers": "python_version >= '3.8'", 83 | "version": "==2.0.4" 84 | }, 85 | "attrs": { 86 | "hashes": [ 87 | "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346", 88 | "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2" 89 | ], 90 | "markers": "python_version >= '3.7'", 91 | "version": "==24.2.0" 92 | }, 93 | "babel": { 94 | "hashes": [ 95 | "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", 96 | "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316" 97 | ], 98 | "markers": "python_version >= '3.8'", 99 | "version": "==2.16.0" 100 | }, 101 | "beautifulsoup4": { 102 | "hashes": [ 103 | "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", 104 | "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed" 105 | ], 106 | "markers": "python_full_version >= '3.6.0'", 107 | "version": "==4.12.3" 108 | }, 109 | "black": { 110 | "hashes": [ 111 | "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6", 112 | "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e", 113 | "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f", 114 | "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018", 115 | "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e", 116 | "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd", 117 | "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4", 118 | "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed", 119 | "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2", 120 | "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42", 121 | "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af", 122 | "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb", 123 | "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368", 124 | "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb", 125 | "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af", 126 | "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed", 127 | "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47", 128 | "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2", 129 | "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a", 130 | "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c", 131 | "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920", 132 | "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1" 133 | ], 134 | "index": "pypi", 135 | "markers": "python_version >= '3.8'", 136 | "version": "==24.8.0" 137 | }, 138 | "bleach": { 139 | "hashes": [ 140 | "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe", 141 | "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6" 142 | ], 143 | "markers": "python_version >= '3.8'", 144 | "version": "==6.1.0" 145 | }, 146 | "certifi": { 147 | "hashes": [ 148 | "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", 149 | "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9" 150 | ], 151 | "markers": "python_version >= '3.6'", 152 | "version": "==2024.8.30" 153 | }, 154 | "cffi": { 155 | "hashes": [ 156 | "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", 157 | "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", 158 | "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", 159 | "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", 160 | "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", 161 | "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", 162 | "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", 163 | "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", 164 | "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", 165 | "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", 166 | "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", 167 | "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", 168 | "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", 169 | "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", 170 | "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", 171 | "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", 172 | "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", 173 | "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", 174 | "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", 175 | "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", 176 | "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", 177 | "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", 178 | "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", 179 | "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", 180 | "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", 181 | "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", 182 | "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", 183 | "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", 184 | "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", 185 | "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", 186 | "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", 187 | "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", 188 | "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", 189 | "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", 190 | "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", 191 | "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", 192 | "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", 193 | "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", 194 | "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", 195 | "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", 196 | "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", 197 | "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", 198 | "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", 199 | "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", 200 | "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", 201 | "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", 202 | "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", 203 | "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", 204 | "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", 205 | "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", 206 | "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", 207 | "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", 208 | "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", 209 | "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", 210 | "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", 211 | "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", 212 | "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", 213 | "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", 214 | "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", 215 | "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", 216 | "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", 217 | "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", 218 | "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", 219 | "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", 220 | "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", 221 | "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", 222 | "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b" 223 | ], 224 | "markers": "python_version >= '3.8'", 225 | "version": "==1.17.1" 226 | }, 227 | "charset-normalizer": { 228 | "hashes": [ 229 | "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", 230 | "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", 231 | "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", 232 | "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", 233 | "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", 234 | "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", 235 | "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", 236 | "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", 237 | "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", 238 | "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", 239 | "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", 240 | "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", 241 | "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", 242 | "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", 243 | "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", 244 | "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", 245 | "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", 246 | "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", 247 | "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", 248 | "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", 249 | "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", 250 | "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", 251 | "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", 252 | "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", 253 | "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", 254 | "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", 255 | "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", 256 | "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", 257 | "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", 258 | "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", 259 | "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", 260 | "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", 261 | "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", 262 | "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", 263 | "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", 264 | "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", 265 | "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", 266 | "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", 267 | "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", 268 | "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", 269 | "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", 270 | "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", 271 | "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", 272 | "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", 273 | "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", 274 | "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", 275 | "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", 276 | "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", 277 | "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", 278 | "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", 279 | "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", 280 | "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", 281 | "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", 282 | "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", 283 | "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", 284 | "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", 285 | "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", 286 | "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", 287 | "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", 288 | "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", 289 | "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", 290 | "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", 291 | "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", 292 | "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", 293 | "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", 294 | "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", 295 | "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", 296 | "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", 297 | "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", 298 | "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", 299 | "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", 300 | "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", 301 | "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", 302 | "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", 303 | "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", 304 | "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", 305 | "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", 306 | "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", 307 | "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", 308 | "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", 309 | "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", 310 | "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", 311 | "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", 312 | "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", 313 | "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", 314 | "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", 315 | "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", 316 | "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", 317 | "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", 318 | "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" 319 | ], 320 | "markers": "python_full_version >= '3.7.0'", 321 | "version": "==3.3.2" 322 | }, 323 | "click": { 324 | "hashes": [ 325 | "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", 326 | "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" 327 | ], 328 | "markers": "python_version >= '3.7'", 329 | "version": "==8.1.7" 330 | }, 331 | "comm": { 332 | "hashes": [ 333 | "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e", 334 | "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3" 335 | ], 336 | "markers": "python_version >= '3.8'", 337 | "version": "==0.2.2" 338 | }, 339 | "cuallee": { 340 | "hashes": [ 341 | "sha256:9f8bb23f3153870a901f92d45ebd24df2d12fcb6a5446f5ce741cd4286a40723", 342 | "sha256:b83a67836aa8966194f51335fe5f3802519c97040dbdb1709bc2e7c094028332" 343 | ], 344 | "index": "pypi", 345 | "markers": "python_version >= '3.10'", 346 | "version": "==0.14.1" 347 | }, 348 | "debugpy": { 349 | "hashes": [ 350 | "sha256:0a1029a2869d01cb777216af8c53cda0476875ef02a2b6ff8b2f2c9a4b04176c", 351 | "sha256:1cd04a73eb2769eb0bfe43f5bfde1215c5923d6924b9b90f94d15f207a402226", 352 | "sha256:28ced650c974aaf179231668a293ecd5c63c0a671ae6d56b8795ecc5d2f48d3c", 353 | "sha256:345d6a0206e81eb68b1493ce2fbffd57c3088e2ce4b46592077a943d2b968ca3", 354 | "sha256:3df6692351172a42af7558daa5019651f898fc67450bf091335aa8a18fbf6f3a", 355 | "sha256:4413b7a3ede757dc33a273a17d685ea2b0c09dbd312cc03f5534a0fd4d40750a", 356 | "sha256:4fbb3b39ae1aa3e5ad578f37a48a7a303dad9a3d018d369bc9ec629c1cfa7408", 357 | "sha256:55919dce65b471eff25901acf82d328bbd5b833526b6c1364bd5133754777a44", 358 | "sha256:5b5c770977c8ec6c40c60d6f58cacc7f7fe5a45960363d6974ddb9b62dbee156", 359 | "sha256:606bccba19f7188b6ea9579c8a4f5a5364ecd0bf5a0659c8a5d0e10dcee3032a", 360 | "sha256:7b0fe36ed9d26cb6836b0a51453653f8f2e347ba7348f2bbfe76bfeb670bfb1c", 361 | "sha256:7e4d594367d6407a120b76bdaa03886e9eb652c05ba7f87e37418426ad2079f7", 362 | "sha256:8f913ee8e9fcf9d38a751f56e6de12a297ae7832749d35de26d960f14280750a", 363 | "sha256:a697beca97dad3780b89a7fb525d5e79f33821a8bc0c06faf1f1289e549743cf", 364 | "sha256:ad84b7cde7fd96cf6eea34ff6c4a1b7887e0fe2ea46e099e53234856f9d99a34", 365 | "sha256:b2112cfeb34b4507399d298fe7023a16656fc553ed5246536060ca7bd0e668d0", 366 | "sha256:b78c1250441ce893cb5035dd6f5fc12db968cc07f91cc06996b2087f7cefdd8e", 367 | "sha256:c0a65b00b7cdd2ee0c2cf4c7335fef31e15f1b7056c7fdbce9e90193e1a8c8cb", 368 | "sha256:c9f7c15ea1da18d2fcc2709e9f3d6de98b69a5b0fff1807fb80bc55f906691f7", 369 | "sha256:db9fb642938a7a609a6c865c32ecd0d795d56c1aaa7a7a5722d77855d5e77f2b", 370 | "sha256:dd3811bd63632bb25eda6bd73bea8e0521794cda02be41fa3160eb26fc29e7ed", 371 | "sha256:e84c276489e141ed0b93b0af648eef891546143d6a48f610945416453a8ad406" 372 | ], 373 | "markers": "python_version >= '3.8'", 374 | "version": "==1.8.5" 375 | }, 376 | "decorator": { 377 | "hashes": [ 378 | "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", 379 | "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" 380 | ], 381 | "markers": "python_version >= '3.5'", 382 | "version": "==5.1.1" 383 | }, 384 | "defusedxml": { 385 | "hashes": [ 386 | "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", 387 | "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61" 388 | ], 389 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 390 | "version": "==0.7.1" 391 | }, 392 | "duckdb": { 393 | "hashes": [ 394 | "sha256:069fb7bca459e31edb32a61f0eea95d7a8a766bef7b8318072563abf8e939593", 395 | "sha256:0e3644b1f034012d82b9baa12a7ea306fe71dc6623731b28c753c4a617ff9499", 396 | "sha256:11ec967b67159361ceade34095796a8d19368ea5c30cad988f44896b082b0816", 397 | "sha256:16243e66a9fd0e64ee265f2634d137adc6593f54ddf3ef55cb8a29e1decf6e54", 398 | "sha256:1f3aea31341ce400640dd522e4399b941f66df17e39884f446638fe958d6117c", 399 | "sha256:211a33c1ddb5cc609f75eb43772b0b03b45d2fa89bec107e4715267ca907806a", 400 | "sha256:23fc9aa0af74e3803ed90c8d98280fd5bcac8c940592bf6288e8fd60fb051d00", 401 | "sha256:29dc18087de47563b3859a6b98bbed96e1c96ce5db829646dc3b16a916997e7d", 402 | "sha256:3da30b7b466f710d52caa1fdc3ef0bf4176ad7f115953cd9f8b0fbf0f723778f", 403 | "sha256:3db4ab31c20de4edaef152930836b38e7662cd71370748fdf2c38ba9cf854dc4", 404 | "sha256:42b910a149e00f40a1766dc74fa309d4255b912a5d2fdcc387287658048650f6", 405 | "sha256:47849d546dc4238c0f20e95fe53b621aa5b08684e68fff91fd84a7092be91a17", 406 | "sha256:4e1c3414f7fd01f4810dc8b335deffc91933a159282d65fef11c1286bc0ded04", 407 | "sha256:510b5885ed6c267b9c0e1e7c6138fdffc2dd6f934a5a95b76da85da127213338", 408 | "sha256:53825a63193c582a78c152ea53de8d145744ddbeea18f452625a82ebc33eb14a", 409 | "sha256:55ef98bcc7ba745752607f1b926e8d9b7ce32c42c423bbad10c44820aefe23a7", 410 | "sha256:58f1633dd2c5af5088ae2d119418e200855d0699d84f2fae9d46d30f404bcead", 411 | "sha256:5e4cbc408e6e41146dea89b9044dae7356e353db0c96b183e5583ee02bc6ae5d", 412 | "sha256:61fb838da51e07ceb0222c4406b059b90e10efcc453c19a3650b73c0112138c4", 413 | "sha256:6370ae27ec8167ccfbefb94f58ad9fdc7bac142399960549d6d367f233189868", 414 | "sha256:64bf2a6e23840d662bd2ac09206a9bd4fa657418884d69e5c352d4456dc70b3c", 415 | "sha256:655df442ceebfc6f3fd6c8766e04b60d44dddedfa90275d794f9fab2d3180879", 416 | "sha256:657bc7ac64d5faf069a782ae73afac51ef30ae2e5d0e09ce6a09d03db84ab35e", 417 | "sha256:6e183729bb64be7798ccbfda6283ebf423c869268c25af2b56929e48f763be2f", 418 | "sha256:7807e2f0d3344668e433f0dc1f54bfaddd410589611393e9a7ed56f8dec9514f", 419 | "sha256:78a4510f82431ee3f14db689fe8727a4a9062c8f2fbb3bcfe3bfad3c1a198004", 420 | "sha256:89f3de8cba57d19b41cd3c47dd06d979bd2a2ffead115480e37afbe72b02896d", 421 | "sha256:8e74b6f8a5145abbf7e6c1a2a61f0adbcd493c19b358f524ec9a3cebdf362abb", 422 | "sha256:aac2fcabe2d5072c252d0b3087365f431de812d8199705089fb073e4d039d19c", 423 | "sha256:aad02f50d5a2020822d1638fc1a9bcf082056f11d2e15ccfc1c1ed4d0f85a3be", 424 | "sha256:b4d4c12b1f98732151bd31377753e0da1a20f6423016d2d097d2e31953ec7c23", 425 | "sha256:b9b6a77ef0183f561b1fc2945fcc762a71570ffd33fea4e3a855d413ed596fe4", 426 | "sha256:bd11bc899cebf5ff936d1276a2dfb7b7db08aba3bcc42924afeafc2163bddb43", 427 | "sha256:c6bc2a58689adf5520303c5f68b065b9f980bd31f1366c541b8c7490abaf55cd", 428 | "sha256:cd9fb1408942411ad360f8414bc3fbf0091c396ca903d947a10f2e31324d5cbd", 429 | "sha256:d02be208d2885ca085d4c852b911493b8cdac9d6eae893259da32bd72a437c25", 430 | "sha256:d18caea926b1e301c29b140418fca697aad728129e269b4f82c2795a184549e1", 431 | "sha256:d8333f3e85fa2a0f1c222b752c2bd42ea875235ff88492f7bcbb6867d0f644eb", 432 | "sha256:d86a6926313913cd2cc7e08816d3e7f72ba340adf2959279b1a80058be6526d9", 433 | "sha256:d89eaaa5df8a57e7d2bc1f4c46493bb1fee319a00155f2015810ad2ace6570ae", 434 | "sha256:e2a08175e43b865c1e9611efd18cacd29ddd69093de442b1ebdf312071df7719", 435 | "sha256:e39f9b7b62e64e10d421ff04480290a70129c38067d1a4f600e9212b10542c5a", 436 | "sha256:e3b6b4fe1edfe35f64f403a9f0ab75258cee35abd964356893ee37424174b7e4", 437 | "sha256:eb66e9e7391801928ea134dcab12d2e4c97f2ce0391c603a3e480bbb15830bc8", 438 | "sha256:ecb19319883564237a7a03a104dbe7f445e73519bb67108fcab3d19b6b91fe30", 439 | "sha256:f6486323ab20656d22ffa8f3c6e109dde30d0b327b7c831f22ebcfe747f97fb0" 440 | ], 441 | "index": "pypi", 442 | "markers": "python_full_version >= '3.7.0'", 443 | "version": "==1.1.0" 444 | }, 445 | "executing": { 446 | "hashes": [ 447 | "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", 448 | "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab" 449 | ], 450 | "markers": "python_version >= '3.8'", 451 | "version": "==2.1.0" 452 | }, 453 | "fastjsonschema": { 454 | "hashes": [ 455 | "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23", 456 | "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a" 457 | ], 458 | "version": "==2.20.0" 459 | }, 460 | "fqdn": { 461 | "hashes": [ 462 | "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f", 463 | "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014" 464 | ], 465 | "version": "==1.5.1" 466 | }, 467 | "h11": { 468 | "hashes": [ 469 | "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", 470 | "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" 471 | ], 472 | "markers": "python_version >= '3.7'", 473 | "version": "==0.14.0" 474 | }, 475 | "httpcore": { 476 | "hashes": [ 477 | "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61", 478 | "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5" 479 | ], 480 | "markers": "python_version >= '3.8'", 481 | "version": "==1.0.5" 482 | }, 483 | "httpx": { 484 | "hashes": [ 485 | "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0", 486 | "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2" 487 | ], 488 | "markers": "python_version >= '3.8'", 489 | "version": "==0.27.2" 490 | }, 491 | "idna": { 492 | "hashes": [ 493 | "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", 494 | "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3" 495 | ], 496 | "markers": "python_version >= '3.6'", 497 | "version": "==3.10" 498 | }, 499 | "iniconfig": { 500 | "hashes": [ 501 | "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", 502 | "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" 503 | ], 504 | "markers": "python_version >= '3.7'", 505 | "version": "==2.0.0" 506 | }, 507 | "ipykernel": { 508 | "hashes": [ 509 | "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5", 510 | "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215" 511 | ], 512 | "markers": "python_version >= '3.8'", 513 | "version": "==6.29.5" 514 | }, 515 | "ipython": { 516 | "hashes": [ 517 | "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e", 518 | "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c" 519 | ], 520 | "markers": "python_version >= '3.10'", 521 | "version": "==8.27.0" 522 | }, 523 | "isoduration": { 524 | "hashes": [ 525 | "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9", 526 | "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042" 527 | ], 528 | "version": "==20.11.0" 529 | }, 530 | "jedi": { 531 | "hashes": [ 532 | "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd", 533 | "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0" 534 | ], 535 | "markers": "python_version >= '3.6'", 536 | "version": "==0.19.1" 537 | }, 538 | "jinja2": { 539 | "hashes": [ 540 | "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", 541 | "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d" 542 | ], 543 | "markers": "python_version >= '3.7'", 544 | "version": "==3.1.4" 545 | }, 546 | "json5": { 547 | "hashes": [ 548 | "sha256:34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f", 549 | "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae" 550 | ], 551 | "markers": "python_version >= '3.8'", 552 | "version": "==0.9.25" 553 | }, 554 | "jsonpointer": { 555 | "hashes": [ 556 | "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", 557 | "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef" 558 | ], 559 | "version": "==3.0.0" 560 | }, 561 | "jsonschema": { 562 | "extras": [ 563 | "format-nongpl" 564 | ], 565 | "hashes": [ 566 | "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", 567 | "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566" 568 | ], 569 | "markers": "python_version >= '3.8'", 570 | "version": "==4.23.0" 571 | }, 572 | "jsonschema-specifications": { 573 | "hashes": [ 574 | "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc", 575 | "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c" 576 | ], 577 | "markers": "python_version >= '3.8'", 578 | "version": "==2023.12.1" 579 | }, 580 | "jupyter-client": { 581 | "hashes": [ 582 | "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419", 583 | "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f" 584 | ], 585 | "markers": "python_version >= '3.8'", 586 | "version": "==8.6.3" 587 | }, 588 | "jupyter-core": { 589 | "hashes": [ 590 | "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", 591 | "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9" 592 | ], 593 | "markers": "python_version >= '3.8'", 594 | "version": "==5.7.2" 595 | }, 596 | "jupyter-events": { 597 | "hashes": [ 598 | "sha256:4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960", 599 | "sha256:670b8229d3cc882ec782144ed22e0d29e1c2d639263f92ca8383e66682845e22" 600 | ], 601 | "markers": "python_version >= '3.8'", 602 | "version": "==0.10.0" 603 | }, 604 | "jupyter-lsp": { 605 | "hashes": [ 606 | "sha256:45fbddbd505f3fbfb0b6cb2f1bc5e15e83ab7c79cd6e89416b248cb3c00c11da", 607 | "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001" 608 | ], 609 | "markers": "python_version >= '3.8'", 610 | "version": "==2.2.5" 611 | }, 612 | "jupyter-server": { 613 | "hashes": [ 614 | "sha256:47ff506127c2f7851a17bf4713434208fc490955d0e8632e95014a9a9afbeefd", 615 | "sha256:66095021aa9638ced276c248b1d81862e4c50f292d575920bbe960de1c56b12b" 616 | ], 617 | "markers": "python_version >= '3.8'", 618 | "version": "==2.14.2" 619 | }, 620 | "jupyter-server-terminals": { 621 | "hashes": [ 622 | "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa", 623 | "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269" 624 | ], 625 | "markers": "python_version >= '3.8'", 626 | "version": "==0.5.3" 627 | }, 628 | "jupyterlab": { 629 | "hashes": [ 630 | "sha256:73b6e0775d41a9fee7ee756c80f58a6bed4040869ccc21411dc559818874d321", 631 | "sha256:ae7f3a1b8cb88b4f55009ce79fa7c06f99d70cd63601ee4aa91815d054f46f75" 632 | ], 633 | "index": "pypi", 634 | "markers": "python_version >= '3.8'", 635 | "version": "==4.2.5" 636 | }, 637 | "jupyterlab-pygments": { 638 | "hashes": [ 639 | "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", 640 | "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780" 641 | ], 642 | "markers": "python_version >= '3.8'", 643 | "version": "==0.3.0" 644 | }, 645 | "jupyterlab-server": { 646 | "hashes": [ 647 | "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4", 648 | "sha256:eb36caca59e74471988f0ae25c77945610b887f777255aa21f8065def9e51ed4" 649 | ], 650 | "markers": "python_version >= '3.8'", 651 | "version": "==2.27.3" 652 | }, 653 | "markupsafe": { 654 | "hashes": [ 655 | "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", 656 | "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", 657 | "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", 658 | "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", 659 | "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", 660 | "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", 661 | "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", 662 | "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df", 663 | "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", 664 | "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", 665 | "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", 666 | "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", 667 | "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", 668 | "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371", 669 | "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2", 670 | "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", 671 | "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52", 672 | "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", 673 | "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", 674 | "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", 675 | "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", 676 | "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", 677 | "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", 678 | "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", 679 | "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", 680 | "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", 681 | "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", 682 | "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", 683 | "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", 684 | "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9", 685 | "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", 686 | "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", 687 | "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", 688 | "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", 689 | "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", 690 | "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", 691 | "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a", 692 | "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", 693 | "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", 694 | "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", 695 | "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", 696 | "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", 697 | "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", 698 | "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", 699 | "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", 700 | "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f", 701 | "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50", 702 | "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", 703 | "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", 704 | "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", 705 | "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", 706 | "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", 707 | "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", 708 | "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", 709 | "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf", 710 | "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", 711 | "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", 712 | "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", 713 | "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", 714 | "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68" 715 | ], 716 | "markers": "python_version >= '3.7'", 717 | "version": "==2.1.5" 718 | }, 719 | "matplotlib-inline": { 720 | "hashes": [ 721 | "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", 722 | "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca" 723 | ], 724 | "markers": "python_version >= '3.8'", 725 | "version": "==0.1.7" 726 | }, 727 | "mistune": { 728 | "hashes": [ 729 | "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205", 730 | "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8" 731 | ], 732 | "markers": "python_version >= '3.7'", 733 | "version": "==3.0.2" 734 | }, 735 | "mypy-extensions": { 736 | "hashes": [ 737 | "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", 738 | "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" 739 | ], 740 | "markers": "python_version >= '3.5'", 741 | "version": "==1.0.0" 742 | }, 743 | "nbclient": { 744 | "hashes": [ 745 | "sha256:4b3f1b7dba531e498449c4db4f53da339c91d449dc11e9af3a43b4eb5c5abb09", 746 | "sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f" 747 | ], 748 | "markers": "python_full_version >= '3.8.0'", 749 | "version": "==0.10.0" 750 | }, 751 | "nbconvert": { 752 | "hashes": [ 753 | "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3", 754 | "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4" 755 | ], 756 | "markers": "python_version >= '3.8'", 757 | "version": "==7.16.4" 758 | }, 759 | "nbformat": { 760 | "hashes": [ 761 | "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", 762 | "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b" 763 | ], 764 | "markers": "python_version >= '3.8'", 765 | "version": "==5.10.4" 766 | }, 767 | "nest-asyncio": { 768 | "hashes": [ 769 | "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", 770 | "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c" 771 | ], 772 | "markers": "python_version >= '3.5'", 773 | "version": "==1.6.0" 774 | }, 775 | "notebook-shim": { 776 | "hashes": [ 777 | "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef", 778 | "sha256:b4b2cfa1b65d98307ca24361f5b30fe785b53c3fd07b7a47e89acb5e6ac638cb" 779 | ], 780 | "markers": "python_version >= '3.7'", 781 | "version": "==0.2.4" 782 | }, 783 | "numpy": { 784 | "hashes": [ 785 | "sha256:046356b19d7ad1890c751b99acad5e82dc4a02232013bd9a9a712fddf8eb60f5", 786 | "sha256:0b8cc2715a84b7c3b161f9ebbd942740aaed913584cae9cdc7f8ad5ad41943d0", 787 | "sha256:0d07841fd284718feffe7dd17a63a2e6c78679b2d386d3e82f44f0108c905550", 788 | "sha256:13cc11c00000848702322af4de0147ced365c81d66053a67c2e962a485b3717c", 789 | "sha256:13ce49a34c44b6de5241f0b38b07e44c1b2dcacd9e36c30f9c2fcb1bb5135db7", 790 | "sha256:24c2ad697bd8593887b019817ddd9974a7f429c14a5469d7fad413f28340a6d2", 791 | "sha256:251105b7c42abe40e3a689881e1793370cc9724ad50d64b30b358bbb3a97553b", 792 | "sha256:2ca4b53e1e0b279142113b8c5eb7d7a877e967c306edc34f3b58e9be12fda8df", 793 | "sha256:3269c9eb8745e8d975980b3a7411a98976824e1fdef11f0aacf76147f662b15f", 794 | "sha256:397bc5ce62d3fb73f304bec332171535c187e0643e176a6e9421a6e3eacef06d", 795 | "sha256:3fc5eabfc720db95d68e6646e88f8b399bfedd235994016351b1d9e062c4b270", 796 | "sha256:50a95ca3560a6058d6ea91d4629a83a897ee27c00630aed9d933dff191f170cd", 797 | "sha256:52ac2e48f5ad847cd43c4755520a2317f3380213493b9d8a4c5e37f3b87df504", 798 | "sha256:53e27293b3a2b661c03f79aa51c3987492bd4641ef933e366e0f9f6c9bf257ec", 799 | "sha256:57eb525e7c2a8fdee02d731f647146ff54ea8c973364f3b850069ffb42799647", 800 | "sha256:5889dd24f03ca5a5b1e8a90a33b5a0846d8977565e4ae003a63d22ecddf6782f", 801 | "sha256:59ca673ad11d4b84ceb385290ed0ebe60266e356641428c845b39cd9df6713ab", 802 | "sha256:6435c48250c12f001920f0751fe50c0348f5f240852cfddc5e2f97e007544cbe", 803 | "sha256:6e5a9cb2be39350ae6c8f79410744e80154df658d5bea06e06e0ac5bb75480d5", 804 | "sha256:7be6a07520b88214ea85d8ac8b7d6d8a1839b0b5cb87412ac9f49fa934eb15d5", 805 | "sha256:7c803b7934a7f59563db459292e6aa078bb38b7ab1446ca38dd138646a38203e", 806 | "sha256:7dd86dfaf7c900c0bbdcb8b16e2f6ddf1eb1fe39c6c8cca6e94844ed3152a8fd", 807 | "sha256:8661c94e3aad18e1ea17a11f60f843a4933ccaf1a25a7c6a9182af70610b2313", 808 | "sha256:8ae0fd135e0b157365ac7cc31fff27f07a5572bdfc38f9c2d43b2aff416cc8b0", 809 | "sha256:910b47a6d0635ec1bd53b88f86120a52bf56dcc27b51f18c7b4a2e2224c29f0f", 810 | "sha256:913cc1d311060b1d409e609947fa1b9753701dac96e6581b58afc36b7ee35af6", 811 | "sha256:920b0911bb2e4414c50e55bd658baeb78281a47feeb064ab40c2b66ecba85553", 812 | "sha256:950802d17a33c07cba7fd7c3dcfa7d64705509206be1606f196d179e539111ed", 813 | "sha256:981707f6b31b59c0c24bcda52e5605f9701cb46da4b86c2e8023656ad3e833cb", 814 | "sha256:98ce7fb5b8063cfdd86596b9c762bf2b5e35a2cdd7e967494ab78a1fa7f8b86e", 815 | "sha256:99f4a9ee60eed1385a86e82288971a51e71df052ed0b2900ed30bc840c0f2e39", 816 | "sha256:9a8e06c7a980869ea67bbf551283bbed2856915f0a792dc32dd0f9dd2fb56728", 817 | "sha256:ae8ce252404cdd4de56dcfce8b11eac3c594a9c16c231d081fb705cf23bd4d9e", 818 | "sha256:afd9c680df4de71cd58582b51e88a61feed4abcc7530bcd3d48483f20fc76f2a", 819 | "sha256:b49742cdb85f1f81e4dc1b39dcf328244f4d8d1ded95dea725b316bd2cf18c95", 820 | "sha256:b5613cfeb1adfe791e8e681128f5f49f22f3fcaa942255a6124d58ca59d9528f", 821 | "sha256:bab7c09454460a487e631ffc0c42057e3d8f2a9ddccd1e60c7bb8ed774992480", 822 | "sha256:c8a0e34993b510fc19b9a2ce7f31cb8e94ecf6e924a40c0c9dd4f62d0aac47d9", 823 | "sha256:caf5d284ddea7462c32b8d4a6b8af030b6c9fd5332afb70e7414d7fdded4bfd0", 824 | "sha256:cea427d1350f3fd0d2818ce7350095c1a2ee33e30961d2f0fef48576ddbbe90f", 825 | "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd", 826 | "sha256:d10c39947a2d351d6d466b4ae83dad4c37cd6c3cdd6d5d0fa797da56f710a6ae", 827 | "sha256:d2b9cd92c8f8e7b313b80e93cedc12c0112088541dcedd9197b5dee3738c1201", 828 | "sha256:d4c57b68c8ef5e1ebf47238e99bf27657511ec3f071c465f6b1bccbef12d4136", 829 | "sha256:d51fc141ddbe3f919e91a096ec739f49d686df8af254b2053ba21a910ae518bf", 830 | "sha256:e097507396c0be4e547ff15b13dc3866f45f3680f789c1a1301b07dadd3fbc78", 831 | "sha256:e30356d530528a42eeba51420ae8bf6c6c09559051887196599d96ee5f536468", 832 | "sha256:e8d5f8a8e3bc87334f025194c6193e408903d21ebaeb10952264943a985066ca", 833 | "sha256:e8dfa9e94fc127c40979c3eacbae1e61fda4fe71d84869cc129e2721973231ef", 834 | "sha256:f212d4f46b67ff604d11fff7cc62d36b3e8714edf68e44e9760e19be38c03eb0", 835 | "sha256:f7506387e191fe8cdb267f912469a3cccc538ab108471291636a96a54e599556", 836 | "sha256:fac6e277a41163d27dfab5f4ec1f7a83fac94e170665a4a50191b545721c6521", 837 | "sha256:fcd8f556cdc8cfe35e70efb92463082b7f43dd7e547eb071ffc36abc0ca4699b" 838 | ], 839 | "markers": "python_version >= '3.10'", 840 | "version": "==2.1.1" 841 | }, 842 | "overrides": { 843 | "hashes": [ 844 | "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", 845 | "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49" 846 | ], 847 | "markers": "python_version >= '3.6'", 848 | "version": "==7.7.0" 849 | }, 850 | "packaging": { 851 | "hashes": [ 852 | "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", 853 | "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" 854 | ], 855 | "markers": "python_version >= '3.8'", 856 | "version": "==24.1" 857 | }, 858 | "pandocfilters": { 859 | "hashes": [ 860 | "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", 861 | "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc" 862 | ], 863 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 864 | "version": "==1.5.1" 865 | }, 866 | "parso": { 867 | "hashes": [ 868 | "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", 869 | "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d" 870 | ], 871 | "markers": "python_version >= '3.6'", 872 | "version": "==0.8.4" 873 | }, 874 | "pathspec": { 875 | "hashes": [ 876 | "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", 877 | "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" 878 | ], 879 | "markers": "python_version >= '3.8'", 880 | "version": "==0.12.1" 881 | }, 882 | "pexpect": { 883 | "hashes": [ 884 | "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", 885 | "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" 886 | ], 887 | "markers": "sys_platform != 'win32' and sys_platform != 'emscripten'", 888 | "version": "==4.9.0" 889 | }, 890 | "platformdirs": { 891 | "hashes": [ 892 | "sha256:50a5450e2e84f44539718293cbb1da0a0885c9d14adf21b77bae4e66fc99d9b5", 893 | "sha256:d4e0b7d8ec176b341fb03cb11ca12d0276faa8c485f9cd218f613840463fc2c0" 894 | ], 895 | "markers": "python_version >= '3.8'", 896 | "version": "==4.3.3" 897 | }, 898 | "pluggy": { 899 | "hashes": [ 900 | "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", 901 | "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" 902 | ], 903 | "markers": "python_version >= '3.8'", 904 | "version": "==1.5.0" 905 | }, 906 | "polars": { 907 | "hashes": [ 908 | "sha256:3323bf6b3f1cf55212ddd35f044af8a1aa02033bca17d06f3852325e0da93a80", 909 | "sha256:45c255749b49bee244d10baeb69057580a0a397125b014bc8854b73ba5bdf45e", 910 | "sha256:589c1b5a9b5167f3c49713212cbeccc39e3a0e12577e21331c50dbf7178e32ed", 911 | "sha256:5cd675e4a306b2da57a1b688e65382aaa9e992dd7156b485fbd7f39892a3d784", 912 | "sha256:a9004a907fc8e923dda27879f7e6eea8e06a753e160d08e606c8b9b5f914f911", 913 | "sha256:c955cca9d109ed5d79f4498915ec80590aa2e4619bc40bafbbeb5a160fcb166e" 914 | ], 915 | "index": "pypi", 916 | "markers": "python_version >= '3.8'", 917 | "version": "==1.7.1" 918 | }, 919 | "prometheus-client": { 920 | "hashes": [ 921 | "sha256:287629d00b147a32dcb2be0b9df905da599b2d82f80377083ec8463309a4bb89", 922 | "sha256:cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7" 923 | ], 924 | "markers": "python_version >= '3.8'", 925 | "version": "==0.20.0" 926 | }, 927 | "prompt-toolkit": { 928 | "hashes": [ 929 | "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10", 930 | "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360" 931 | ], 932 | "markers": "python_full_version >= '3.7.0'", 933 | "version": "==3.0.47" 934 | }, 935 | "psutil": { 936 | "hashes": [ 937 | "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35", 938 | "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0", 939 | "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c", 940 | "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1", 941 | "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3", 942 | "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c", 943 | "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd", 944 | "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3", 945 | "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0", 946 | "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2", 947 | "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6", 948 | "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d", 949 | "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c", 950 | "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0", 951 | "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132", 952 | "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14", 953 | "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0" 954 | ], 955 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", 956 | "version": "==6.0.0" 957 | }, 958 | "ptyprocess": { 959 | "hashes": [ 960 | "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", 961 | "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" 962 | ], 963 | "markers": "os_name != 'nt'", 964 | "version": "==0.7.0" 965 | }, 966 | "pure-eval": { 967 | "hashes": [ 968 | "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", 969 | "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42" 970 | ], 971 | "version": "==0.2.3" 972 | }, 973 | "pyarrow": { 974 | "hashes": [ 975 | "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", 976 | "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", 977 | "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", 978 | "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", 979 | "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", 980 | "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", 981 | "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", 982 | "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", 983 | "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", 984 | "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", 985 | "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", 986 | "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", 987 | "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", 988 | "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", 989 | "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", 990 | "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", 991 | "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", 992 | "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", 993 | "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", 994 | "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", 995 | "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", 996 | "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", 997 | "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", 998 | "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", 999 | "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", 1000 | "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", 1001 | "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", 1002 | "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", 1003 | "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", 1004 | "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", 1005 | "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", 1006 | "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", 1007 | "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", 1008 | "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", 1009 | "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", 1010 | "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8" 1011 | ], 1012 | "index": "pypi", 1013 | "markers": "python_version >= '3.8'", 1014 | "version": "==17.0.0" 1015 | }, 1016 | "pycparser": { 1017 | "hashes": [ 1018 | "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", 1019 | "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc" 1020 | ], 1021 | "markers": "python_version >= '3.8'", 1022 | "version": "==2.22" 1023 | }, 1024 | "pygments": { 1025 | "hashes": [ 1026 | "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", 1027 | "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a" 1028 | ], 1029 | "markers": "python_version >= '3.8'", 1030 | "version": "==2.18.0" 1031 | }, 1032 | "pytest": { 1033 | "hashes": [ 1034 | "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", 1035 | "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" 1036 | ], 1037 | "index": "pypi", 1038 | "markers": "python_version >= '3.8'", 1039 | "version": "==8.3.3" 1040 | }, 1041 | "python-dateutil": { 1042 | "hashes": [ 1043 | "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", 1044 | "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" 1045 | ], 1046 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 1047 | "version": "==2.9.0.post0" 1048 | }, 1049 | "python-json-logger": { 1050 | "hashes": [ 1051 | "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c", 1052 | "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd" 1053 | ], 1054 | "markers": "python_version >= '3.6'", 1055 | "version": "==2.0.7" 1056 | }, 1057 | "pyyaml": { 1058 | "hashes": [ 1059 | "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", 1060 | "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", 1061 | "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", 1062 | "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", 1063 | "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", 1064 | "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", 1065 | "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", 1066 | "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", 1067 | "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", 1068 | "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", 1069 | "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", 1070 | "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", 1071 | "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", 1072 | "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", 1073 | "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", 1074 | "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", 1075 | "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", 1076 | "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", 1077 | "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", 1078 | "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", 1079 | "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", 1080 | "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", 1081 | "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", 1082 | "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", 1083 | "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", 1084 | "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", 1085 | "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", 1086 | "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", 1087 | "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", 1088 | "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", 1089 | "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", 1090 | "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", 1091 | "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", 1092 | "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", 1093 | "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", 1094 | "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", 1095 | "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", 1096 | "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", 1097 | "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", 1098 | "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", 1099 | "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", 1100 | "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", 1101 | "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", 1102 | "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", 1103 | "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", 1104 | "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", 1105 | "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", 1106 | "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", 1107 | "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", 1108 | "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", 1109 | "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", 1110 | "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", 1111 | "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4" 1112 | ], 1113 | "markers": "python_version >= '3.8'", 1114 | "version": "==6.0.2" 1115 | }, 1116 | "pyzmq": { 1117 | "hashes": [ 1118 | "sha256:007137c9ac9ad5ea21e6ad97d3489af654381324d5d3ba614c323f60dab8fae6", 1119 | "sha256:034da5fc55d9f8da09015d368f519478a52675e558c989bfcb5cf6d4e16a7d2a", 1120 | "sha256:05590cdbc6b902101d0e65d6a4780af14dc22914cc6ab995d99b85af45362cc9", 1121 | "sha256:070672c258581c8e4f640b5159297580a9974b026043bd4ab0470be9ed324f1f", 1122 | "sha256:0aca98bc423eb7d153214b2df397c6421ba6373d3397b26c057af3c904452e37", 1123 | "sha256:0bed0e799e6120b9c32756203fb9dfe8ca2fb8467fed830c34c877e25638c3fc", 1124 | "sha256:0d987a3ae5a71c6226b203cfd298720e0086c7fe7c74f35fa8edddfbd6597eed", 1125 | "sha256:0eaa83fc4c1e271c24eaf8fb083cbccef8fde77ec8cd45f3c35a9a123e6da097", 1126 | "sha256:160c7e0a5eb178011e72892f99f918c04a131f36056d10d9c1afb223fc952c2d", 1127 | "sha256:17bf5a931c7f6618023cdacc7081f3f266aecb68ca692adac015c383a134ca52", 1128 | "sha256:17c412bad2eb9468e876f556eb4ee910e62d721d2c7a53c7fa31e643d35352e6", 1129 | "sha256:18c8dc3b7468d8b4bdf60ce9d7141897da103c7a4690157b32b60acb45e333e6", 1130 | "sha256:1a534f43bc738181aa7cbbaf48e3eca62c76453a40a746ab95d4b27b1111a7d2", 1131 | "sha256:1c17211bc037c7d88e85ed8b7d8f7e52db6dc8eca5590d162717c654550f7282", 1132 | "sha256:1f3496d76b89d9429a656293744ceca4d2ac2a10ae59b84c1da9b5165f429ad3", 1133 | "sha256:1fcc03fa4997c447dce58264e93b5aa2d57714fbe0f06c07b7785ae131512732", 1134 | "sha256:226af7dcb51fdb0109f0016449b357e182ea0ceb6b47dfb5999d569e5db161d5", 1135 | "sha256:23f4aad749d13698f3f7b64aad34f5fc02d6f20f05999eebc96b89b01262fb18", 1136 | "sha256:25bf2374a2a8433633c65ccb9553350d5e17e60c8eb4de4d92cc6bd60f01d306", 1137 | "sha256:28ad5233e9c3b52d76196c696e362508959741e1a005fb8fa03b51aea156088f", 1138 | "sha256:28c812d9757fe8acecc910c9ac9dafd2ce968c00f9e619db09e9f8f54c3a68a3", 1139 | "sha256:29c6a4635eef69d68a00321e12a7d2559fe2dfccfa8efae3ffb8e91cd0b36a8b", 1140 | "sha256:29c7947c594e105cb9e6c466bace8532dc1ca02d498684128b339799f5248277", 1141 | "sha256:2a50625acdc7801bc6f74698c5c583a491c61d73c6b7ea4dee3901bb99adb27a", 1142 | "sha256:2ae90ff9dad33a1cfe947d2c40cb9cb5e600d759ac4f0fd22616ce6540f72797", 1143 | "sha256:2c4a71d5d6e7b28a47a394c0471b7e77a0661e2d651e7ae91e0cab0a587859ca", 1144 | "sha256:2ea4ad4e6a12e454de05f2949d4beddb52460f3de7c8b9d5c46fbb7d7222e02c", 1145 | "sha256:2eb7735ee73ca1b0d71e0e67c3739c689067f055c764f73aac4cc8ecf958ee3f", 1146 | "sha256:31507f7b47cc1ead1f6e86927f8ebb196a0bab043f6345ce070f412a59bf87b5", 1147 | "sha256:35cffef589bcdc587d06f9149f8d5e9e8859920a071df5a2671de2213bef592a", 1148 | "sha256:367b4f689786fca726ef7a6c5ba606958b145b9340a5e4808132cc65759abd44", 1149 | "sha256:39887ac397ff35b7b775db7201095fc6310a35fdbae85bac4523f7eb3b840e20", 1150 | "sha256:3a495b30fc91db2db25120df5847d9833af237546fd59170701acd816ccc01c4", 1151 | "sha256:3b55a4229ce5da9497dd0452b914556ae58e96a4381bb6f59f1305dfd7e53fc8", 1152 | "sha256:402b190912935d3db15b03e8f7485812db350d271b284ded2b80d2e5704be780", 1153 | "sha256:43a47408ac52647dfabbc66a25b05b6a61700b5165807e3fbd40063fcaf46386", 1154 | "sha256:4661c88db4a9e0f958c8abc2b97472e23061f0bc737f6f6179d7a27024e1faa5", 1155 | "sha256:46a446c212e58456b23af260f3d9fb785054f3e3653dbf7279d8f2b5546b21c2", 1156 | "sha256:470d4a4f6d48fb34e92d768b4e8a5cc3780db0d69107abf1cd7ff734b9766eb0", 1157 | "sha256:49d34ab71db5a9c292a7644ce74190b1dd5a3475612eefb1f8be1d6961441971", 1158 | "sha256:4d29ab8592b6ad12ebbf92ac2ed2bedcfd1cec192d8e559e2e099f648570e19b", 1159 | "sha256:4d80b1dd99c1942f74ed608ddb38b181b87476c6a966a88a950c7dee118fdf50", 1160 | "sha256:4da04c48873a6abdd71811c5e163bd656ee1b957971db7f35140a2d573f6949c", 1161 | "sha256:4f78c88905461a9203eac9faac157a2a0dbba84a0fd09fd29315db27be40af9f", 1162 | "sha256:4ff9dc6bc1664bb9eec25cd17506ef6672d506115095411e237d571e92a58231", 1163 | "sha256:5506f06d7dc6ecf1efacb4a013b1f05071bb24b76350832c96449f4a2d95091c", 1164 | "sha256:55cf66647e49d4621a7e20c8d13511ef1fe1efbbccf670811864452487007e08", 1165 | "sha256:5a509df7d0a83a4b178d0f937ef14286659225ef4e8812e05580776c70e155d5", 1166 | "sha256:5c2b3bfd4b9689919db068ac6c9911f3fcb231c39f7dd30e3138be94896d18e6", 1167 | "sha256:6835dd60355593de10350394242b5757fbbd88b25287314316f266e24c61d073", 1168 | "sha256:689c5d781014956a4a6de61d74ba97b23547e431e9e7d64f27d4922ba96e9d6e", 1169 | "sha256:6a96179a24b14fa6428cbfc08641c779a53f8fcec43644030328f44034c7f1f4", 1170 | "sha256:6ace4f71f1900a548f48407fc9be59c6ba9d9aaf658c2eea6cf2779e72f9f317", 1171 | "sha256:6b274e0762c33c7471f1a7471d1a2085b1a35eba5cdc48d2ae319f28b6fc4de3", 1172 | "sha256:706e794564bec25819d21a41c31d4df2d48e1cc4b061e8d345d7fb4dd3e94072", 1173 | "sha256:70fc7fcf0410d16ebdda9b26cbd8bf8d803d220a7f3522e060a69a9c87bf7bad", 1174 | "sha256:7133d0a1677aec369d67dd78520d3fa96dd7f3dcec99d66c1762870e5ea1a50a", 1175 | "sha256:7445be39143a8aa4faec43b076e06944b8f9d0701b669df4af200531b21e40bb", 1176 | "sha256:76589c020680778f06b7e0b193f4b6dd66d470234a16e1df90329f5e14a171cd", 1177 | "sha256:76589f2cd6b77b5bdea4fca5992dc1c23389d68b18ccc26a53680ba2dc80ff2f", 1178 | "sha256:77eb0968da535cba0470a5165468b2cac7772cfb569977cff92e240f57e31bef", 1179 | "sha256:794a4562dcb374f7dbbfb3f51d28fb40123b5a2abadee7b4091f93054909add5", 1180 | "sha256:7ad1bc8d1b7a18497dda9600b12dc193c577beb391beae5cd2349184db40f187", 1181 | "sha256:7f98f6dfa8b8ccaf39163ce872bddacca38f6a67289116c8937a02e30bbe9711", 1182 | "sha256:8423c1877d72c041f2c263b1ec6e34360448decfb323fa8b94e85883043ef988", 1183 | "sha256:8685fa9c25ff00f550c1fec650430c4b71e4e48e8d852f7ddcf2e48308038640", 1184 | "sha256:878206a45202247781472a2d99df12a176fef806ca175799e1c6ad263510d57c", 1185 | "sha256:89289a5ee32ef6c439086184529ae060c741334b8970a6855ec0b6ad3ff28764", 1186 | "sha256:8ab5cad923cc95c87bffee098a27856c859bd5d0af31bd346035aa816b081fe1", 1187 | "sha256:8b435f2753621cd36e7c1762156815e21c985c72b19135dac43a7f4f31d28dd1", 1188 | "sha256:8be4700cd8bb02cc454f630dcdf7cfa99de96788b80c51b60fe2fe1dac480289", 1189 | "sha256:8c997098cc65e3208eca09303630e84d42718620e83b733d0fd69543a9cab9cb", 1190 | "sha256:8ea039387c10202ce304af74def5021e9adc6297067f3441d348d2b633e8166a", 1191 | "sha256:8f7e66c7113c684c2b3f1c83cdd3376103ee0ce4c49ff80a648643e57fb22218", 1192 | "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", 1193 | "sha256:92a78853d7280bffb93df0a4a6a2498cba10ee793cc8076ef797ef2f74d107cf", 1194 | "sha256:989d842dc06dc59feea09e58c74ca3e1678c812a4a8a2a419046d711031f69c7", 1195 | "sha256:9cb3a6460cdea8fe8194a76de8895707e61ded10ad0be97188cc8463ffa7e3a8", 1196 | "sha256:9dd8cd1aeb00775f527ec60022004d030ddc51d783d056e3e23e74e623e33726", 1197 | "sha256:9ed69074a610fad1c2fda66180e7b2edd4d31c53f2d1872bc2d1211563904cd9", 1198 | "sha256:9edda2df81daa129b25a39b86cb57dfdfe16f7ec15b42b19bfac503360d27a93", 1199 | "sha256:a2224fa4a4c2ee872886ed00a571f5e967c85e078e8e8c2530a2fb01b3309b88", 1200 | "sha256:a4f96f0d88accc3dbe4a9025f785ba830f968e21e3e2c6321ccdfc9aef755115", 1201 | "sha256:aedd5dd8692635813368e558a05266b995d3d020b23e49581ddd5bbe197a8ab6", 1202 | "sha256:aee22939bb6075e7afededabad1a56a905da0b3c4e3e0c45e75810ebe3a52672", 1203 | "sha256:b1d464cb8d72bfc1a3adc53305a63a8e0cac6bc8c5a07e8ca190ab8d3faa43c2", 1204 | "sha256:b8f86dd868d41bea9a5f873ee13bf5551c94cf6bc51baebc6f85075971fe6eea", 1205 | "sha256:bc6bee759a6bddea5db78d7dcd609397449cb2d2d6587f48f3ca613b19410cfc", 1206 | "sha256:bea2acdd8ea4275e1278350ced63da0b166421928276c7c8e3f9729d7402a57b", 1207 | "sha256:bfa832bfa540e5b5c27dcf5de5d82ebc431b82c453a43d141afb1e5d2de025fa", 1208 | "sha256:c0e6091b157d48cbe37bd67233318dbb53e1e6327d6fc3bb284afd585d141003", 1209 | "sha256:c3789bd5768ab5618ebf09cef6ec2b35fed88709b104351748a63045f0ff9797", 1210 | "sha256:c530e1eecd036ecc83c3407f77bb86feb79916d4a33d11394b8234f3bd35b940", 1211 | "sha256:c811cfcd6a9bf680236c40c6f617187515269ab2912f3d7e8c0174898e2519db", 1212 | "sha256:c92d73464b886931308ccc45b2744e5968cbaade0b1d6aeb40d8ab537765f5bc", 1213 | "sha256:cccba051221b916a4f5e538997c45d7d136a5646442b1231b916d0164067ea27", 1214 | "sha256:cdeabcff45d1c219636ee2e54d852262e5c2e085d6cb476d938aee8d921356b3", 1215 | "sha256:ced65e5a985398827cc9276b93ef6dfabe0273c23de8c7931339d7e141c2818e", 1216 | "sha256:d049df610ac811dcffdc147153b414147428567fbbc8be43bb8885f04db39d98", 1217 | "sha256:dacd995031a01d16eec825bf30802fceb2c3791ef24bcce48fa98ce40918c27b", 1218 | "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629", 1219 | "sha256:ded0fc7d90fe93ae0b18059930086c51e640cdd3baebdc783a695c77f123dcd9", 1220 | "sha256:e3e0210287329272539eea617830a6a28161fbbd8a3271bf4150ae3e58c5d0e6", 1221 | "sha256:e6fa2e3e683f34aea77de8112f6483803c96a44fd726d7358b9888ae5bb394ec", 1222 | "sha256:ea0eb6af8a17fa272f7b98d7bebfab7836a0d62738e16ba380f440fceca2d951", 1223 | "sha256:ea7f69de383cb47522c9c208aec6dd17697db7875a4674c4af3f8cfdac0bdeae", 1224 | "sha256:eac5174677da084abf378739dbf4ad245661635f1600edd1221f150b165343f4", 1225 | "sha256:fc4f7a173a5609631bb0c42c23d12c49df3966f89f496a51d3eb0ec81f4519d6", 1226 | "sha256:fdb5b3e311d4d4b0eb8b3e8b4d1b0a512713ad7e6a68791d0923d1aec433d919" 1227 | ], 1228 | "markers": "python_version >= '3.7'", 1229 | "version": "==26.2.0" 1230 | }, 1231 | "referencing": { 1232 | "hashes": [ 1233 | "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c", 1234 | "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de" 1235 | ], 1236 | "markers": "python_version >= '3.8'", 1237 | "version": "==0.35.1" 1238 | }, 1239 | "requests": { 1240 | "hashes": [ 1241 | "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", 1242 | "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" 1243 | ], 1244 | "markers": "python_version >= '3.8'", 1245 | "version": "==2.32.3" 1246 | }, 1247 | "rfc3339-validator": { 1248 | "hashes": [ 1249 | "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", 1250 | "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa" 1251 | ], 1252 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 1253 | "version": "==0.1.4" 1254 | }, 1255 | "rfc3986-validator": { 1256 | "hashes": [ 1257 | "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9", 1258 | "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055" 1259 | ], 1260 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 1261 | "version": "==0.1.1" 1262 | }, 1263 | "rpds-py": { 1264 | "hashes": [ 1265 | "sha256:06db23d43f26478303e954c34c75182356ca9aa7797d22c5345b16871ab9c45c", 1266 | "sha256:0e13e6952ef264c40587d510ad676a988df19adea20444c2b295e536457bc585", 1267 | "sha256:11ef6ce74616342888b69878d45e9f779b95d4bd48b382a229fe624a409b72c5", 1268 | "sha256:1259c7b3705ac0a0bd38197565a5d603218591d3f6cee6e614e380b6ba61c6f6", 1269 | "sha256:18d7585c463087bddcfa74c2ba267339f14f2515158ac4db30b1f9cbdb62c8ef", 1270 | "sha256:1e0f80b739e5a8f54837be5d5c924483996b603d5502bfff79bf33da06164ee2", 1271 | "sha256:1e5f3cd7397c8f86c8cc72d5a791071431c108edd79872cdd96e00abd8497d29", 1272 | "sha256:220002c1b846db9afd83371d08d239fdc865e8f8c5795bbaec20916a76db3318", 1273 | "sha256:22e6c9976e38f4d8c4a63bd8a8edac5307dffd3ee7e6026d97f3cc3a2dc02a0b", 1274 | "sha256:238a2d5b1cad28cdc6ed15faf93a998336eb041c4e440dd7f902528b8891b399", 1275 | "sha256:2580b0c34583b85efec8c5c5ec9edf2dfe817330cc882ee972ae650e7b5ef739", 1276 | "sha256:28527c685f237c05445efec62426d285e47a58fb05ba0090a4340b73ecda6dee", 1277 | "sha256:2cf126d33a91ee6eedc7f3197b53e87a2acdac63602c0f03a02dd69e4b138174", 1278 | "sha256:338ca4539aad4ce70a656e5187a3a31c5204f261aef9f6ab50e50bcdffaf050a", 1279 | "sha256:39ed0d010457a78f54090fafb5d108501b5aa5604cc22408fc1c0c77eac14344", 1280 | "sha256:3ad0fda1635f8439cde85c700f964b23ed5fc2d28016b32b9ee5fe30da5c84e2", 1281 | "sha256:3d2b1ad682a3dfda2a4e8ad8572f3100f95fad98cb99faf37ff0ddfe9cbf9d03", 1282 | "sha256:3d61339e9f84a3f0767b1995adfb171a0d00a1185192718a17af6e124728e0f5", 1283 | "sha256:3fde368e9140312b6e8b6c09fb9f8c8c2f00999d1823403ae90cc00480221b22", 1284 | "sha256:40ce74fc86ee4645d0a225498d091d8bc61f39b709ebef8204cb8b5a464d3c0e", 1285 | "sha256:49a8063ea4296b3a7e81a5dfb8f7b2d73f0b1c20c2af401fb0cdf22e14711a96", 1286 | "sha256:4a1f1d51eccb7e6c32ae89243cb352389228ea62f89cd80823ea7dd1b98e0b91", 1287 | "sha256:4b16aa0107ecb512b568244ef461f27697164d9a68d8b35090e9b0c1c8b27752", 1288 | "sha256:4f1ed4749a08379555cebf4650453f14452eaa9c43d0a95c49db50c18b7da075", 1289 | "sha256:4fe84294c7019456e56d93e8ababdad5a329cd25975be749c3f5f558abb48253", 1290 | "sha256:50eccbf054e62a7b2209b28dc7a22d6254860209d6753e6b78cfaeb0075d7bee", 1291 | "sha256:514b3293b64187172bc77c8fb0cdae26981618021053b30d8371c3a902d4d5ad", 1292 | "sha256:54b43a2b07db18314669092bb2de584524d1ef414588780261e31e85846c26a5", 1293 | "sha256:55fea87029cded5df854ca7e192ec7bdb7ecd1d9a3f63d5c4eb09148acf4a7ce", 1294 | "sha256:569b3ea770c2717b730b61998b6c54996adee3cef69fc28d444f3e7920313cf7", 1295 | "sha256:56e27147a5a4c2c21633ff8475d185734c0e4befd1c989b5b95a5d0db699b21b", 1296 | "sha256:57eb94a8c16ab08fef6404301c38318e2c5a32216bf5de453e2714c964c125c8", 1297 | "sha256:5a35df9f5548fd79cb2f52d27182108c3e6641a4feb0f39067911bf2adaa3e57", 1298 | "sha256:5a8c94dad2e45324fc74dce25e1645d4d14df9a4e54a30fa0ae8bad9a63928e3", 1299 | "sha256:5b4f105deeffa28bbcdff6c49b34e74903139afa690e35d2d9e3c2c2fba18cec", 1300 | "sha256:5c1dc0f53856b9cc9a0ccca0a7cc61d3d20a7088201c0937f3f4048c1718a209", 1301 | "sha256:614fdafe9f5f19c63ea02817fa4861c606a59a604a77c8cdef5aa01d28b97921", 1302 | "sha256:617c7357272c67696fd052811e352ac54ed1d9b49ab370261a80d3b6ce385045", 1303 | "sha256:65794e4048ee837494aea3c21a28ad5fc080994dfba5b036cf84de37f7ad5074", 1304 | "sha256:6632f2d04f15d1bd6fe0eedd3b86d9061b836ddca4c03d5cf5c7e9e6b7c14580", 1305 | "sha256:6c8ef2ebf76df43f5750b46851ed1cdf8f109d7787ca40035fe19fbdc1acc5a7", 1306 | "sha256:758406267907b3781beee0f0edfe4a179fbd97c0be2e9b1154d7f0a1279cf8e5", 1307 | "sha256:7e60cb630f674a31f0368ed32b2a6b4331b8350d67de53c0359992444b116dd3", 1308 | "sha256:89c19a494bf3ad08c1da49445cc5d13d8fefc265f48ee7e7556839acdacf69d0", 1309 | "sha256:8a86a9b96070674fc88b6f9f71a97d2c1d3e5165574615d1f9168ecba4cecb24", 1310 | "sha256:8bc7690f7caee50b04a79bf017a8d020c1f48c2a1077ffe172abec59870f1139", 1311 | "sha256:8d7919548df3f25374a1f5d01fbcd38dacab338ef5f33e044744b5c36729c8db", 1312 | "sha256:9426133526f69fcaba6e42146b4e12d6bc6c839b8b555097020e2b78ce908dcc", 1313 | "sha256:9824fb430c9cf9af743cf7aaf6707bf14323fb51ee74425c380f4c846ea70789", 1314 | "sha256:9bb4a0d90fdb03437c109a17eade42dfbf6190408f29b2744114d11586611d6f", 1315 | "sha256:9bc2d153989e3216b0559251b0c260cfd168ec78b1fac33dd485750a228db5a2", 1316 | "sha256:9d35cef91e59ebbeaa45214861874bc6f19eb35de96db73e467a8358d701a96c", 1317 | "sha256:a1862d2d7ce1674cffa6d186d53ca95c6e17ed2b06b3f4c476173565c862d232", 1318 | "sha256:a84ab91cbe7aab97f7446652d0ed37d35b68a465aeef8fc41932a9d7eee2c1a6", 1319 | "sha256:aa7f429242aae2947246587d2964fad750b79e8c233a2367f71b554e9447949c", 1320 | "sha256:aa9a0521aeca7d4941499a73ad7d4f8ffa3d1affc50b9ea11d992cd7eff18a29", 1321 | "sha256:ac2f4f7a98934c2ed6505aead07b979e6f999389f16b714448fb39bbaa86a489", 1322 | "sha256:ae94bd0b2f02c28e199e9bc51485d0c5601f58780636185660f86bf80c89af94", 1323 | "sha256:af0fc424a5842a11e28956e69395fbbeab2c97c42253169d87e90aac2886d751", 1324 | "sha256:b2a5db5397d82fa847e4c624b0c98fe59d2d9b7cf0ce6de09e4d2e80f8f5b3f2", 1325 | "sha256:b4c29cbbba378759ac5786730d1c3cb4ec6f8ababf5c42a9ce303dc4b3d08cda", 1326 | "sha256:b74b25f024b421d5859d156750ea9a65651793d51b76a2e9238c05c9d5f203a9", 1327 | "sha256:b7f19250ceef892adf27f0399b9e5afad019288e9be756d6919cb58892129f51", 1328 | "sha256:b80d4a7900cf6b66bb9cee5c352b2d708e29e5a37fe9bf784fa97fc11504bf6c", 1329 | "sha256:b8c00a3b1e70c1d3891f0db1b05292747f0dbcfb49c43f9244d04c70fbc40eb8", 1330 | "sha256:bb273176be34a746bdac0b0d7e4e2c467323d13640b736c4c477881a3220a989", 1331 | "sha256:c3c20f0ddeb6e29126d45f89206b8291352b8c5b44384e78a6499d68b52ae511", 1332 | "sha256:c3e130fd0ec56cb76eb49ef52faead8ff09d13f4527e9b0c400307ff72b408e1", 1333 | "sha256:c52d3f2f82b763a24ef52f5d24358553e8403ce05f893b5347098014f2d9eff2", 1334 | "sha256:c6377e647bbfd0a0b159fe557f2c6c602c159fc752fa316572f012fc0bf67150", 1335 | "sha256:c638144ce971df84650d3ed0096e2ae7af8e62ecbbb7b201c8935c370df00a2c", 1336 | "sha256:ce9845054c13696f7af7f2b353e6b4f676dab1b4b215d7fe5e05c6f8bb06f965", 1337 | "sha256:cf258ede5bc22a45c8e726b29835b9303c285ab46fc7c3a4cc770736b5304c9f", 1338 | "sha256:d0a26ffe9d4dd35e4dfdd1e71f46401cff0181c75ac174711ccff0459135fa58", 1339 | "sha256:d0b67d87bb45ed1cd020e8fbf2307d449b68abc45402fe1a4ac9e46c3c8b192b", 1340 | "sha256:d20277fd62e1b992a50c43f13fbe13277a31f8c9f70d59759c88f644d66c619f", 1341 | "sha256:d454b8749b4bd70dd0a79f428731ee263fa6995f83ccb8bada706e8d1d3ff89d", 1342 | "sha256:d4c7d1a051eeb39f5c9547e82ea27cbcc28338482242e3e0b7768033cb083821", 1343 | "sha256:d72278a30111e5b5525c1dd96120d9e958464316f55adb030433ea905866f4de", 1344 | "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121", 1345 | "sha256:d807dc2051abe041b6649681dce568f8e10668e3c1c6543ebae58f2d7e617855", 1346 | "sha256:dbe982f38565bb50cb7fb061ebf762c2f254ca3d8c20d4006878766e84266272", 1347 | "sha256:dcedf0b42bcb4cfff4101d7771a10532415a6106062f005ab97d1d0ab5681c60", 1348 | "sha256:deb62214c42a261cb3eb04d474f7155279c1a8a8c30ac89b7dcb1721d92c3c02", 1349 | "sha256:def7400461c3a3f26e49078302e1c1b38f6752342c77e3cf72ce91ca69fb1bc1", 1350 | "sha256:df3de6b7726b52966edf29663e57306b23ef775faf0ac01a3e9f4012a24a4140", 1351 | "sha256:e1940dae14e715e2e02dfd5b0f64a52e8374a517a1e531ad9412319dc3ac7879", 1352 | "sha256:e4df1e3b3bec320790f699890d41c59d250f6beda159ea3c44c3f5bac1976940", 1353 | "sha256:e6900ecdd50ce0facf703f7a00df12374b74bbc8ad9fe0f6559947fb20f82364", 1354 | "sha256:ea438162a9fcbee3ecf36c23e6c68237479f89f962f82dae83dc15feeceb37e4", 1355 | "sha256:eb851b7df9dda52dc1415ebee12362047ce771fc36914586b2e9fcbd7d293b3e", 1356 | "sha256:ec31a99ca63bf3cd7f1a5ac9fe95c5e2d060d3c768a09bc1d16e235840861420", 1357 | "sha256:f0475242f447cc6cb8a9dd486d68b2ef7fbee84427124c232bff5f63b1fe11e5", 1358 | "sha256:f2fbf7db2012d4876fb0d66b5b9ba6591197b0f165db8d99371d976546472a24", 1359 | "sha256:f60012a73aa396be721558caa3a6fd49b3dd0033d1675c6d59c4502e870fcf0c", 1360 | "sha256:f8e604fe73ba048c06085beaf51147eaec7df856824bfe7b98657cf436623daf", 1361 | "sha256:f90a4cd061914a60bd51c68bcb4357086991bd0bb93d8aa66a6da7701370708f", 1362 | "sha256:f918a1a130a6dfe1d7fe0f105064141342e7dd1611f2e6a21cd2f5c8cb1cfb3e", 1363 | "sha256:fa518bcd7600c584bf42e6617ee8132869e877db2f76bcdc281ec6a4113a53ab", 1364 | "sha256:faefcc78f53a88f3076b7f8be0a8f8d35133a3ecf7f3770895c25f8813460f08", 1365 | "sha256:fcaeb7b57f1a1e071ebd748984359fef83ecb026325b9d4ca847c95bc7311c92", 1366 | "sha256:fd2d84f40633bc475ef2d5490b9c19543fbf18596dcb1b291e3a12ea5d722f7a", 1367 | "sha256:fdfc3a892927458d98f3d55428ae46b921d1f7543b89382fdb483f5640daaec8" 1368 | ], 1369 | "markers": "python_version >= '3.8'", 1370 | "version": "==0.20.0" 1371 | }, 1372 | "send2trash": { 1373 | "hashes": [ 1374 | "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9", 1375 | "sha256:b18e7a3966d99871aefeb00cfbcfdced55ce4871194810fc71f4aa484b953abf" 1376 | ], 1377 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", 1378 | "version": "==1.8.3" 1379 | }, 1380 | "setuptools": { 1381 | "hashes": [ 1382 | "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2", 1383 | "sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538" 1384 | ], 1385 | "markers": "python_version >= '3.8'", 1386 | "version": "==75.1.0" 1387 | }, 1388 | "six": { 1389 | "hashes": [ 1390 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", 1391 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" 1392 | ], 1393 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 1394 | "version": "==1.16.0" 1395 | }, 1396 | "sniffio": { 1397 | "hashes": [ 1398 | "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", 1399 | "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc" 1400 | ], 1401 | "markers": "python_version >= '3.7'", 1402 | "version": "==1.3.1" 1403 | }, 1404 | "soupsieve": { 1405 | "hashes": [ 1406 | "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", 1407 | "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9" 1408 | ], 1409 | "markers": "python_version >= '3.8'", 1410 | "version": "==2.6" 1411 | }, 1412 | "stack-data": { 1413 | "hashes": [ 1414 | "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", 1415 | "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695" 1416 | ], 1417 | "version": "==0.6.3" 1418 | }, 1419 | "terminado": { 1420 | "hashes": [ 1421 | "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", 1422 | "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e" 1423 | ], 1424 | "markers": "python_version >= '3.8'", 1425 | "version": "==0.18.1" 1426 | }, 1427 | "tinycss2": { 1428 | "hashes": [ 1429 | "sha256:152f9acabd296a8375fbca5b84c961ff95971fcfc32e79550c8df8e29118c54d", 1430 | "sha256:54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7" 1431 | ], 1432 | "markers": "python_version >= '3.8'", 1433 | "version": "==1.3.0" 1434 | }, 1435 | "toolz": { 1436 | "hashes": [ 1437 | "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85", 1438 | "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d" 1439 | ], 1440 | "markers": "python_version >= '3.7'", 1441 | "version": "==0.12.1" 1442 | }, 1443 | "tornado": { 1444 | "hashes": [ 1445 | "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8", 1446 | "sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f", 1447 | "sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4", 1448 | "sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3", 1449 | "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14", 1450 | "sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842", 1451 | "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9", 1452 | "sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698", 1453 | "sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7", 1454 | "sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d", 1455 | "sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4" 1456 | ], 1457 | "markers": "python_version >= '3.8'", 1458 | "version": "==6.4.1" 1459 | }, 1460 | "traitlets": { 1461 | "hashes": [ 1462 | "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", 1463 | "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f" 1464 | ], 1465 | "markers": "python_version >= '3.8'", 1466 | "version": "==5.14.3" 1467 | }, 1468 | "types-python-dateutil": { 1469 | "hashes": [ 1470 | "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6", 1471 | "sha256:9706c3b68284c25adffc47319ecc7947e5bb86b3773f843c73906fd598bc176e" 1472 | ], 1473 | "markers": "python_version >= '3.8'", 1474 | "version": "==2.9.0.20240906" 1475 | }, 1476 | "uri-template": { 1477 | "hashes": [ 1478 | "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7", 1479 | "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363" 1480 | ], 1481 | "version": "==1.3.0" 1482 | }, 1483 | "urllib3": { 1484 | "hashes": [ 1485 | "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", 1486 | "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9" 1487 | ], 1488 | "markers": "python_version >= '3.8'", 1489 | "version": "==2.2.3" 1490 | }, 1491 | "wcwidth": { 1492 | "hashes": [ 1493 | "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", 1494 | "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5" 1495 | ], 1496 | "version": "==0.2.13" 1497 | }, 1498 | "webcolors": { 1499 | "hashes": [ 1500 | "sha256:08b07af286a01bcd30d583a7acadf629583d1f79bfef27dd2c2c5c263817277d", 1501 | "sha256:fc4c3b59358ada164552084a8ebee637c221e4059267d0f8325b3b560f6c7f0a" 1502 | ], 1503 | "version": "==24.8.0" 1504 | }, 1505 | "webencodings": { 1506 | "hashes": [ 1507 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", 1508 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" 1509 | ], 1510 | "version": "==0.5.1" 1511 | }, 1512 | "websocket-client": { 1513 | "hashes": [ 1514 | "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", 1515 | "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da" 1516 | ], 1517 | "markers": "python_version >= '3.8'", 1518 | "version": "==1.8.0" 1519 | } 1520 | }, 1521 | "develop": {} 1522 | } 1523 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | * [Build a data engineering project, with step-by-step instructions](#build-a-data-engineering-project-with-step-by-step-instructions) 2 | * [Data used](#data-used) 3 | * [Architecture](#architecture) 4 | * [Setup](#setup) 5 | * [Option 1: Github codespaces (Recommended)](#option-1-github-codespaces-recommended) 6 | * [Option 2: Run locally](#option-2-run-locally) 7 | 8 | # Build a data engineering project, with step-by-step instructions 9 | 10 | * Code for the blog: **[Build data engineering projects with step-by-step instruction](https://www.startdataengineering.com/post/de-proj-step-by-step/)** 11 | * **Live workshop link** 12 | 13 | [![Live workshop](https://img.youtube.com/vi/bfiOLwp1aWM/0.jpg)](https://www.youtube.com/live/bfiOLwp1aWM) 14 | 15 | 16 | ## Data used 17 | 18 | Let's assume we are working with a car part seller database (tpch). The data is available in a duckdb database. See the data model below: 19 | 20 | ![TPCH data model](./assets/images/tpch_erd.png) 21 | 22 | We can create fake input data using the [create_input_data.py](https://github.com/josephmachado/de_project/blob/main/setup/create_input_data.py). 23 | 24 | ## Architecture 25 | 26 | Most data teams have their version of the 3-hop architecture. For example, dbt has its own version (stage, intermediate, mart), and Spark has medallion (bronze, silver, gold) architecture. 27 | 28 | ![Data Flow](./assets/images/dep-arch.png) 29 | 30 | **Tools used:** 31 | 32 | 1. [Polars logo](https://pola.rs/) 33 | 2. [Docker logo](https://www.docker.com/) 34 | 3. [Apache Airflow logo](https://airflow.apache.org/) 35 | 4. [Pytest logo](https://docs.pytest.org/en/stable/) 36 | 5. [DuckDB logo](https://duckdb.org/) 37 | 38 | ## Setup 39 | 40 | You have two options to run the exercises in this repo 41 | 42 | ### Option 1: Github codespaces (Recommended) 43 | 44 | Steps: 45 | 46 | 1. Create [Github codespaces with this link](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=858828036&ref=main&devcontainer_path=.devcontainer%2Fdevcontainer.json&geo=UsWest). 47 | 2. Wait for Github to install the [requirements.txt](./requirements.txt). This step can take about 5minutes. 48 | ![installation](./assets/images/inst.png) 49 | 3. Now open the `setup-data-project.ipynb` and it will open in a Jupyter notebook interface. You will be asked for your kernel choice, choose `Python Environments` and then `python3.12.00 Global`. 50 | ![Jupyter notebook in VScode](./assets/images/vsjupy.png) 51 | 4. The **[setup-data-project](./setup-data-project.ipynb)** notebook that goes over how to create a data pipeline. 52 | 5. In the terminal run the following commands to setup input data, run etl and run tests. 53 | 54 | ```bash 55 | # setup input data 56 | python ./setup/create_input_data.py 57 | # run pipeline 58 | python dags/run_pipeline.py 59 | # run tests 60 | python -m pytest dags/tests/unit/test_dim_customer.py 61 | ``` 62 | 63 | ### Option 2: Run locally 64 | 65 | Steps: 66 | 67 | 1. Clone this repo, cd into the cloned repo 68 | 2. Start a virtual env and install requirements. 69 | 3. Start Jupyter lab and run the `setup-data-project.ipynb` notebook that goes over how to create a data pipeline. 70 | ```bash 71 | git clone https://github.com/josephmachado/de_project.git 72 | cd de_project 73 | rm -rf env 74 | python -m venv ./env # create a virtual env 75 | source env/bin/activate # use virtual environment 76 | pip install -r requirements.txt 77 | jupyter lab 78 | ``` 79 | 4. In the terminal run the following commands to setup input data, run etl and run tests. 80 | 81 | ```bash 82 | # setup input data 83 | python ./setup/create_input_data.py 84 | # run pipeline 85 | python dags/run_pipeline.py 86 | # run tests 87 | python -m pytest dags/tests/unit/test_dim_customer.py 88 | ``` 89 | 90 | -------------------------------------------------------------------------------- /answers-setup-data-project.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a8f12273-d47e-4093-8c92-f7c0c03bed53", 6 | "metadata": {}, 7 | "source": [ 8 | "# 1" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "59b22131-8ac0-4222-87db-cd4c6de2110d", 14 | "metadata": {}, 15 | "source": [ 16 | "**Exercise** what is the relationship between the `orders` table and `customer` table.\n", 17 | "\n", 18 | "a. 1 to many\n", 19 | "\n", 20 | "b. many to 1\n", 21 | "\n", 22 | "c. many to many\n", 23 | "\n", 24 | "d. 1 to 1" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "id": "99c2771e-452d-4fef-9d18-733d636f7523", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "answer = 'd'\n", 35 | "# Check answer\n", 36 | "assert answer == 'd'" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "3fe74dd7-5fa5-4dbf-8dcc-76631ca0b453", 42 | "metadata": {}, 43 | "source": [ 44 | "# 2" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "7390fcad-3d9a-4dc5-9afd-fae240376c81", 50 | "metadata": {}, 51 | "source": [ 52 | "**Exercise**: what the relationship between the `customer` table and the `orders` table?\n", 53 | "\n", 54 | "a. 1 to many\n", 55 | "\n", 56 | "b. many to 1\n", 57 | "\n", 58 | "c. many to many\n", 59 | "\n", 60 | "d. 1 to 1\n" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "id": "2695194a-268b-4812-9b2d-b12d49b91fe4", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "answer = 'a'\n", 71 | "# Check answer\n", 72 | "assert answer == 'a'" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "291c28c0-3dca-48eb-b3eb-799ff091577c", 78 | "metadata": {}, 79 | "source": [ 80 | "# 3" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "id": "8cd9e88e-34f5-44a6-9178-d85470f76945", 86 | "metadata": {}, 87 | "source": [ 88 | "**Exercise**: Assume that you have to create a `wide_lineitem` table with all the dimensions at its respective grain. What other tables will you left join with `fct_lineitem` table?\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "id": "1f8e6070-3c6d-4993-b0c4-05116cc32f51", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "tables_to_left_join = ['partsupp', 'part', 'supplier']\n", 99 | "# answer\n", 100 | "assert tables_to_left_join == ['partsupp', 'part', 'supplier']" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "id": "34ab7ccb-a514-4612-98fa-625e5fb5e20f", 106 | "metadata": {}, 107 | "source": [ 108 | "# 4" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "db325f04-c8c7-441f-b58d-d9e34cc26a36", 114 | "metadata": {}, 115 | "source": [ 116 | "**Exercise:**\n", 117 | "Write code (similar to above)\n", 118 | "1. Remove the o_ and l_ from the order and lineitem table's column names\n", 119 | "2. We also rename customer key and order key to customer_key and order_key respectively" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "id": "6b26f740-d936-4cec-b04f-28b64a3301d8", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# Answer\n", 130 | "# cleaned_orders_df = orders_df.rename(lambda col_name: col_name[2:]).rename({\"custkey\": \"customer_key\", \"orderkey\": \"order_key\"})\n", 131 | "# cleaned_lineitem_df = lineitem_df.rename(lambda col_name: col_name[2:]).rename({\"orderkey\": \"order_key\"})" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "id": "7465c312-37d3-4289-a705-94367e95ee1a", 137 | "metadata": {}, 138 | "source": [ 139 | "# 5" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "f0cf1b6e-0706-4cc8-8fed-af9aa36485be", 145 | "metadata": {}, 146 | "source": [ 147 | "**Exercise:** There is a deliberate mistake in the above cell, what is it?\n", 148 | "\n", 149 | "**Hint** Is the code actually doing what it is supposed to do?" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "id": "ce15a660-8d5e-41bd-9aaf-6e99f1ca00c1", 155 | "metadata": {}, 156 | "source": [ 157 | "**Answer**\n", 158 | "\n", 159 | "The code is inserting the `comparison_json` but in reality it should insert the `curr_metric`" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "1c3a3a81-a404-42c8-8f49-f280c155047f", 165 | "metadata": {}, 166 | "source": [ 167 | "# 6" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "bf7662dd-d540-4f11-bd94-a3030d324248", 173 | "metadata": {}, 174 | "source": [ 175 | "**Exercise**: How would you improve the `check_variance` method?\n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "6eaf200b-b68a-47e1-bbaf-2742ce90a243", 181 | "metadata": {}, 182 | "source": [ 183 | "**Answer**\n", 184 | "\n", 185 | "The `check_variance` function does 2 things, calculate current metric and then compares.\n", 186 | "It would be enable better reusability and separation of code if we\n", 187 | "\n", 188 | "1. Change `get_latest_run_metrics` to `get_run_metrics(run_id)`. This way the same function can be used for `prev_metrics` and `curr_metrics` keep implementation (see json conversions) to one place.\n", 189 | "2. Move out the computation of comparison to a separate `compare_metrics(metric_dict_1, metric_dict_2)` which returns comparison dict.\n", 190 | "3. Keep the threshold check in `check_variance` function.\n", 191 | "\n", 192 | "While this may seem like overkill (especially if these functions are only used in one place), it helps will code maintainability and testing." 193 | ] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 3 (ipykernel)", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.12.4" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 5 217 | } 218 | -------------------------------------------------------------------------------- /assets/images/airflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/airflow.png -------------------------------------------------------------------------------- /assets/images/cs3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/cs3.png -------------------------------------------------------------------------------- /assets/images/dagdep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/dagdep.png -------------------------------------------------------------------------------- /assets/images/data-tools.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/data-tools.png -------------------------------------------------------------------------------- /assets/images/data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/data_flow.png -------------------------------------------------------------------------------- /assets/images/dbtps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/dbtps.png -------------------------------------------------------------------------------- /assets/images/dep-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/dep-arch.png -------------------------------------------------------------------------------- /assets/images/docker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/docker.png -------------------------------------------------------------------------------- /assets/images/duckdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/duckdb.png -------------------------------------------------------------------------------- /assets/images/folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/folder.png -------------------------------------------------------------------------------- /assets/images/inst.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/inst.png -------------------------------------------------------------------------------- /assets/images/pytest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/pytest.png -------------------------------------------------------------------------------- /assets/images/tpch_erd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/tpch_erd.png -------------------------------------------------------------------------------- /assets/images/wap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/images/wap.png -------------------------------------------------------------------------------- /assets/videos/perms.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/assets/videos/perms.mp4 -------------------------------------------------------------------------------- /containers/airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:2.9.2 2 | COPY requirements.txt / 3 | RUN pip install --no-cache-dir -r /requirements.txt 4 | 5 | COPY quarto.sh / 6 | RUN cd / && bash /quarto.sh -------------------------------------------------------------------------------- /containers/airflow/quarto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curl -L -o ~/quarto-1.5.43-linux-amd64.tar.gz https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.43/quarto-1.5.43-linux-amd64.tar.gz 4 | mkdir ~/opt 5 | tar -C ~/opt -xvzf ~/quarto-1.5.43-linux-amd64.tar.gz 6 | 7 | mkdir ~/.local/bin 8 | ln -s ~/opt/quarto-1.5.43/bin/quarto ~/.local/bin/quarto 9 | 10 | ( echo ""; echo 'export PATH=$PATH:~/.local/bin\n' ; echo "" ) >> ~/.profile 11 | source ~/.profile 12 | -------------------------------------------------------------------------------- /containers/airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | flake8==7.0.0 2 | mypy==1.10.0 3 | isort==5.13.2 4 | moto[all]==5.0.9 5 | pytest==8.2.2 6 | pytest-mock==3.14.0 7 | apache-airflow-client==2.9.0 8 | yoyo-migrations==8.2.0 9 | plotly==5.22.0 10 | jupyter==1.0.0 11 | types-requests==2.32.0.20240602 12 | cuallee==0.10.3 13 | polars==1.7.1 14 | jupyterlab==4.2.5 15 | duckdb==1.1.0 16 | pyarrow==17.0.0 17 | black==24.8.0 -------------------------------------------------------------------------------- /dags/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/dags/__init__.py -------------------------------------------------------------------------------- /dags/etl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/dags/etl/__init__.py -------------------------------------------------------------------------------- /dags/etl/bronze/customer.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | def create_dataset(table_name="customer", source_database="tpch.db"): 5 | con = duckdb.connect(source_database) 6 | pulled_df = con.sql(f"select * from {table_name}").pl() 7 | return pulled_df.rename(lambda col_name: col_name[2:]).rename( 8 | {"custkey": "customer_key"} 9 | ) 10 | -------------------------------------------------------------------------------- /dags/etl/bronze/lineitem.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | def create_dataset(table_name="lineitem", source_database="tpch.db"): 5 | con = duckdb.connect(source_database) 6 | pulled_df = con.sql(f"select * from {table_name}").pl() 7 | return pulled_df.rename(lambda col_name: col_name[2:]).rename( 8 | {"orderkey": "order_key"} 9 | ) 10 | -------------------------------------------------------------------------------- /dags/etl/bronze/nation.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | def create_dataset(table_name="nation", source_database="tpch.db"): 5 | con = duckdb.connect(source_database) 6 | pulled_df = con.sql(f"select * from {table_name}").pl() 7 | return pulled_df.rename(lambda col_name: col_name[2:]) 8 | -------------------------------------------------------------------------------- /dags/etl/bronze/orders.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | def create_dataset(table_name="orders", source_database="tpch.db"): 5 | con = duckdb.connect(source_database) 6 | pulled_df = con.sql(f"select * from {table_name}").pl() 7 | return pulled_df.rename(lambda col_name: col_name[2:]).rename( 8 | {"orderkey": "order_key", "custkey": "customer_key"} 9 | ) 10 | -------------------------------------------------------------------------------- /dags/etl/bronze/region.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | def create_dataset(table_name="region", source_database="tpch.db"): 5 | con = duckdb.connect(source_database) 6 | pulled_df = con.sql(f"select * from {table_name}").pl() 7 | return pulled_df.rename(lambda col_name: col_name[2:]) 8 | -------------------------------------------------------------------------------- /dags/etl/gold/obt/wide_lineitem.py: -------------------------------------------------------------------------------- 1 | def create_dataset(fct_lineitem): 2 | return fct_lineitem 3 | -------------------------------------------------------------------------------- /dags/etl/gold/obt/wide_orders.py: -------------------------------------------------------------------------------- 1 | def create_dataset(fct_orders, dim_customer): 2 | return fct_orders.join(dim_customer, on="customer_key", how="left") 3 | -------------------------------------------------------------------------------- /dags/etl/gold/pre_aggregated/customer_outreach_metrics.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import polars as pl 4 | from utils.metadata import get_latest_run_metrics 5 | 6 | 7 | def create_dataset(wide_lineitem, wide_orders): 8 | order_lineitem_metrics = wide_lineitem.group_by(pl.col("order_key")).agg( 9 | pl.col("linenumber").count().alias("num_lineitems") 10 | ) 11 | return ( 12 | wide_orders.join(order_lineitem_metrics, on="order_key", how="left") 13 | .group_by( 14 | pl.col("customer_key"), pl.col("name").alias("customer_name") 15 | ) 16 | .agg( 17 | pl.min("totalprice").alias("min_order_value"), 18 | pl.max("totalprice").alias("max_order_value"), 19 | pl.mean("totalprice").alias("avg_order_value"), 20 | pl.mean("num_lineitems").alias("avg_num_items_per_order"), 21 | ) 22 | ) 23 | 24 | 25 | def percentage_difference(val1, val2): 26 | if val1 == 0 and val2 == 0: 27 | return 0.0 28 | elif val1 == 0 or val2 == 0: 29 | return 100.0 30 | return abs((val1 - val2) / ((val1 + val2) / 2)) * 100 31 | 32 | 33 | def check_no_duplicates(customer_outreach_metrics_df): 34 | # check uniqueness 35 | if ( 36 | customer_outreach_metrics_df.filter( 37 | customer_outreach_metrics_df.select( 38 | pl.col("customer_key") 39 | ).is_duplicated() 40 | ).shape[0] 41 | > 0 42 | ): 43 | raise Exception("Duplicate customer_keys") 44 | 45 | 46 | def check_variance(customer_outreach_metrics_df, perc_threshold=5): 47 | prev_metric = get_latest_run_metrics() 48 | if prev_metric is None or len(prev_metric) == 0: 49 | return 50 | prev_metric['sum_avg_order_value'] = int( 51 | float(prev_metric['sum_avg_order_value']) 52 | ) 53 | curr_metric = json.loads( 54 | customer_outreach_metrics_df.select( 55 | pl.col("avg_num_items_per_order").alias( 56 | "sum_avg_num_items_per_order" 57 | ), 58 | pl.col("avg_order_value").cast(int).alias("sum_avg_order_value"), 59 | ) 60 | .sum() 61 | .write_json() 62 | )[0] 63 | comparison = {} 64 | for key in curr_metric: 65 | if key in prev_metric: 66 | comparison[key] = percentage_difference( 67 | curr_metric[key], prev_metric[key] 68 | ) 69 | 70 | for k, v in comparison.items(): 71 | if v >= perc_threshold: 72 | raise Exception(f"Difference for {k} is greater than 5%: {v}%") 73 | 74 | 75 | def validate_dataset(customer_outreach_metrics_df): 76 | # data quality checks 77 | check_no_duplicates(customer_outreach_metrics_df) 78 | check_variance(customer_outreach_metrics_df) 79 | -------------------------------------------------------------------------------- /dags/etl/silver/dim_customer.py: -------------------------------------------------------------------------------- 1 | def create_dataset(cleaned_customer_df, cleaned_nation_df, cleaned_region_df): 2 | return ( 3 | cleaned_customer_df.join( 4 | cleaned_nation_df, on="nationkey", how="left", suffix="_nation" 5 | ) 6 | .join(cleaned_region_df, on="regionkey", how="left", suffix="_region") 7 | .rename( 8 | { 9 | "name_nation": "nation_name", 10 | "name_region": "region_name", 11 | "comment_nation": "nation_comment", 12 | "comment_region": "region_comment", 13 | } 14 | ) 15 | ) 16 | -------------------------------------------------------------------------------- /dags/etl/silver/fct_lineitem.py: -------------------------------------------------------------------------------- 1 | def create_dataset(lineitem): 2 | return lineitem 3 | -------------------------------------------------------------------------------- /dags/etl/silver/fct_orders.py: -------------------------------------------------------------------------------- 1 | def create_dataset(orders): 2 | return orders 3 | -------------------------------------------------------------------------------- /dags/run_pipeline.py: -------------------------------------------------------------------------------- 1 | from etl.bronze import customer, lineitem, nation, orders, region 2 | from etl.gold.obt import wide_lineitem, wide_orders 3 | from etl.gold.pre_aggregated import customer_outreach_metrics 4 | from etl.silver import dim_customer, fct_lineitem, fct_orders 5 | 6 | 7 | def create_customer_outreach_metrics(): 8 | # create necessary bronze table 9 | customer_df = customer.create_dataset() 10 | lineitem_df = lineitem.create_dataset() 11 | nation_df = nation.create_dataset() 12 | orders_df = orders.create_dataset() 13 | region_df = region.create_dataset() 14 | 15 | # Create silver tables 16 | dim_customer_df = dim_customer.create_dataset( 17 | customer_df, nation_df, region_df 18 | ) 19 | fct_lineitem_df = fct_lineitem.create_dataset(lineitem_df) 20 | fct_orders_df = fct_orders.create_dataset(orders_df) 21 | 22 | # Create gold obt tables 23 | wide_lineitem_df = wide_lineitem.create_dataset(fct_lineitem_df) 24 | wide_orders_df = wide_orders.create_dataset(fct_orders_df, dim_customer_df) 25 | 26 | # create gold pre-aggregated tables 27 | customer_outreach_metrics_df = customer_outreach_metrics.create_dataset( 28 | wide_lineitem_df, wide_orders_df 29 | ) 30 | 31 | # validate data quality 32 | customer_outreach_metrics.validate_dataset(customer_outreach_metrics_df) 33 | return customer_outreach_metrics_df 34 | 35 | 36 | if __name__ == "__main__": 37 | # Print output 38 | print(create_customer_outreach_metrics().limit(10)) 39 | -------------------------------------------------------------------------------- /dags/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/dags/tests/__init__.py -------------------------------------------------------------------------------- /dags/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josephmachado/de_project/a6a3e551e34bf8e23d24958ef27d6375fbea0a1b/dags/tests/unit/__init__.py -------------------------------------------------------------------------------- /dags/tests/unit/test_dim_customer.py: -------------------------------------------------------------------------------- 1 | import polars as pl 2 | import pytest 3 | from polars.testing import assert_frame_equal 4 | 5 | from dags.etl.silver import dim_customer 6 | 7 | 8 | # Sample data for testing 9 | @pytest.fixture 10 | def cleaned_customer_df(): 11 | return pl.DataFrame( 12 | { 13 | "custkey": [1, 2], 14 | "name": ["Customer1", "Customer2"], 15 | "nationkey": [101, 102], 16 | "regionkey": [201, 202], 17 | } 18 | ) 19 | 20 | 21 | @pytest.fixture 22 | def cleaned_nation_df(): 23 | return pl.DataFrame( 24 | { 25 | "nationkey": [101, 102], 26 | "name_nation": ["Nation1", "Nation2"], 27 | "comment_nation": ["Comment1", "Comment2"], 28 | } 29 | ) 30 | 31 | 32 | @pytest.fixture 33 | def cleaned_region_df(): 34 | return pl.DataFrame( 35 | { 36 | "regionkey": [201, 202], 37 | "name_region": ["Region1", "Region2"], 38 | "comment_region": ["Comment3", "Comment4"], 39 | } 40 | ) 41 | 42 | 43 | # The function to test 44 | def test_dim_customer_create_dataset( 45 | cleaned_customer_df, cleaned_nation_df, cleaned_region_df 46 | ): 47 | result_df = dim_customer.create_dataset( 48 | cleaned_customer_df, cleaned_nation_df, cleaned_region_df 49 | ) 50 | 51 | expected_df = pl.DataFrame( 52 | { 53 | "custkey": [1, 2], 54 | "name": ["Customer1", "Customer2"], 55 | "nationkey": [101, 102], 56 | "regionkey": [201, 202], 57 | "nation_name": ["Nation1", "Nation2"], 58 | "nation_comment": ["Comment1", "Comment2"], 59 | "region_name": ["Region1", "Region2"], 60 | "region_comment": ["Comment3", "Comment4"], 61 | } 62 | ) 63 | 64 | assert_frame_equal(result_df, expected_df) 65 | -------------------------------------------------------------------------------- /dags/tpch_etl.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | from run_pipeline import create_customer_outreach_metrics 4 | 5 | from airflow import DAG 6 | from airflow.decorators import task 7 | from airflow.operators.dummy import DummyOperator 8 | 9 | with DAG( 10 | 'tpch_etl', 11 | description='Demo DAG', 12 | schedule_interval=timedelta(minutes=1), 13 | start_date=datetime(2024, 9, 23), 14 | catchup=False, 15 | ) as dag: 16 | 17 | @task 18 | def create_customer_outreach_metrics_task(): 19 | create_customer_outreach_metrics() 20 | 21 | stop_pipeline = DummyOperator(task_id='stop_pipeline') 22 | 23 | create_customer_outreach_metrics_task() >> stop_pipeline 24 | -------------------------------------------------------------------------------- /dags/utils/metadata.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlite3 3 | from datetime import datetime 4 | 5 | 6 | def get_latest_run_metrics(): 7 | # Connect to SQLite database 8 | conn = sqlite3.connect("metadata.db") 9 | 10 | # Create a cursor object 11 | cursor = conn.cursor() 12 | 13 | # Fetch the most recent row based on run_id 14 | cursor.execute( 15 | """ 16 | SELECT * FROM run_metadata 17 | ORDER BY run_id DESC 18 | LIMIT 1 19 | """ 20 | ) 21 | 22 | # Get the result 23 | most_recent_row = cursor.fetchone() 24 | 25 | # Close the connection 26 | conn.close() 27 | return ( 28 | json.loads(most_recent_row[1]) 29 | if most_recent_row and len(most_recent_row) > 0 30 | else None 31 | ) 32 | 33 | 34 | def insert_run_metrics(curr_metrics): 35 | # Connect to SQLite database 36 | conn = sqlite3.connect("metadata.db") 37 | 38 | # Create a cursor object 39 | cursor = conn.cursor() 40 | curr_metrics_json = json.dumps(curr_metrics) 41 | 42 | current_timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M') 43 | # Insert data into the run_metadata table 44 | cursor.execute( 45 | """ 46 | INSERT INTO run_metadata (run_id, metadata) 47 | VALUES (?, ?) 48 | """, 49 | (current_timestamp, curr_metrics_json), 50 | ) 51 | 52 | # Commit the changes and close the connection 53 | conn.commit() 54 | conn.close() 55 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | x-airflow-common: 3 | &airflow-common 4 | build: 5 | context: ./containers/airflow/ 6 | environment: 7 | &airflow-common-env 8 | AIRFLOW__CORE__EXECUTOR: LocalExecutor 9 | AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow 10 | AIRFLOW__CORE__FERNET_KEY: '' 11 | AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' 12 | AIRFLOW__CORE__LOAD_EXAMPLES: 'false' 13 | AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth' 14 | AIRFLOW_CONN_POSTGRES_DEFAULT: postgres://airflow:airflow@postgres:5432/airflow 15 | 16 | volumes: 17 | - ./dags:/opt/airflow/dags 18 | - ./de_project:/opt/airflow/de_project 19 | - ./data:/opt/airflow/data 20 | - ./visualization:/opt/airflow/visualization 21 | - ./logs:/opt/airflow/logs 22 | - ./plugins:/opt/airflow/plugins 23 | - ./tests:/opt/airflow/tests 24 | - ./temp:/opt/airflow/temp 25 | - ./migrations:/opt/airflow/migrations 26 | - ./setup:/opt/airflow/setup 27 | user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-0}" 28 | depends_on: 29 | postgres: 30 | condition: service_healthy 31 | 32 | services: 33 | postgres: 34 | container_name: postgres 35 | image: postgres:16 36 | environment: 37 | POSTGRES_USER: airflow 38 | POSTGRES_PASSWORD: airflow 39 | POSTGRES_DB: airflow 40 | healthcheck: 41 | test: [ "CMD", "pg_isready", "-U", "airflow" ] 42 | interval: 5s 43 | retries: 5 44 | restart: always 45 | ports: 46 | - "5432:5432" 47 | 48 | airflow-webserver: 49 | <<: *airflow-common 50 | container_name: webserver 51 | command: webserver 52 | ports: 53 | - 8080:8080 54 | healthcheck: 55 | test: 56 | [ 57 | "CMD", 58 | "curl", 59 | "--fail", 60 | "http://localhost:8080/health" 61 | ] 62 | interval: 10s 63 | timeout: 10s 64 | retries: 5 65 | restart: always 66 | 67 | airflow-scheduler: 68 | <<: *airflow-common 69 | container_name: scheduler 70 | command: scheduler 71 | healthcheck: 72 | test: 73 | [ 74 | "CMD-SHELL", 75 | 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"' 76 | ] 77 | interval: 10s 78 | timeout: 10s 79 | retries: 5 80 | restart: always 81 | 82 | airflow-init: 83 | <<: *airflow-common 84 | command: version 85 | environment: 86 | <<: *airflow-common-env 87 | _AIRFLOW_DB_UPGRADE: 'true' 88 | _AIRFLOW_WWW_USER_CREATE: 'true' 89 | _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} 90 | _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} 91 | -------------------------------------------------------------------------------- /logs/scheduler/latest: -------------------------------------------------------------------------------- 1 | 2024-12-23 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | polars==1.7.1 2 | pytest==8.3.3 3 | jupyterlab==4.2.5 4 | duckdb==1.1.0 5 | pyarrow==17.0.0 6 | black==24.8.0 7 | -------------------------------------------------------------------------------- /setup-data-project.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8088c83a-f802-43c6-830c-736c57e51585", 6 | "metadata": {}, 7 | "source": [ 8 | "# Setup\n", 9 | "\n", 10 | "Make sure to follow the **[setup instructions here before starting](https://github.com/josephmachado/de_project?tab=readme-ov-file#option-1-github-codespaces-recommended)**." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "2839c497-a703-47e8-b875-f1dca2dcbeb5", 16 | "metadata": {}, 17 | "source": [ 18 | "# Note: The recommended readings mentioned in this workshop(& more) will be covered in detail as part of my Data Engineering Hands-on Workshop, **[sign up here](https://astounding-architect-5764.ck.page/684e1f422f)**" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "c599d856-e7e3-4689-aab8-6f9c34468d10", 24 | "metadata": {}, 25 | "source": [ 26 | "# Live Workshop Link:\n", 27 | "\n", 28 | "[![Live workshop](https://img.youtube.com/vi/bfiOLwp1aWM/0.jpg)](https://www.youtube.com/live/bfiOLwp1aWM)\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "9f6d7cf4-f2dd-4004-b247-75907a5511d0", 34 | "metadata": { 35 | "jp-MarkdownHeadingCollapsed": true 36 | }, 37 | "source": [ 38 | "# Introduction\n", 39 | "\n", 40 | "There are a lot of data projects available on the web (e.g., **[my list of data eng projects](https://www.startdataengineering.com/post/data-engineering-projects/)**). While these projects are great, starting from scratch to build your data project can be challenging. If you are \n", 41 | "\n", 42 | "> Wondering how to go from an idea to a production-ready data pipeline\n", 43 | "\n", 44 | "> Feeling overwhelmed by how all the parts of a data system fit together\n", 45 | "\n", 46 | "> Unsure that the pipelines you build are up to industry-standard\n", 47 | "\n", 48 | "If so, this post is for you! In it, we will go over how to build a data project step-by-step from scratch.\n", 49 | "\n", 50 | "By the end of this post, you will be able to quickly create data projects for any use case and see how the different parts of data systems work together. \n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "2603c6cf-c3e5-4103-bf08-9b079a1d325a", 56 | "metadata": {}, 57 | "source": [ 58 | "# Parts of data project\n", 59 | "\n", 60 | "Most data engineering tool falls into one of the parts shown below (as explained in this [post](https://www.startdataengineering.com/post/parts-of-dataengineering/))\n", 61 | "\n", 62 | "![Data tools](./assets/images/data-tools.png)\n", 63 | "\n", 64 | "In this post, we will review the parts of a data project and select tools to build a data pipeline. While we chose TPCH data for this project, anyone can choose any data set they find interesting and follow the below steps to quickly build their data pipeline.\n", 65 | "\n", 66 | "### **Recommended reading**: **[What are the key parts of data engineering](https://www.startdataengineering.com/post/parts-of-dataengineering/)**" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "id": "02ad6b23-067c-47d5-b8de-01b8a21e887c", 72 | "metadata": {}, 73 | "source": [ 74 | "# Requirements\n", 75 | "\n", 76 | "The first step before you start should be defining precise requirements. Please work with the end users to define them (or define them yourself for side projects).\n", 77 | "\n", 78 | "We will go over a few key requirements below. \n", 79 | "\n", 80 | "### **Recommended reading**: **[this post that goes over how to gather requirements for data projects in detail!](https://www.startdataengineering.com/post/n-questions-data-pipeline-req/)**.\n", 81 | "\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "id": "df1218fd-cae6-49e5-b1e4-2b8ac4eceb70", 87 | "metadata": {}, 88 | "source": [ 89 | "## Understand input datasets available\n", 90 | "\n", 91 | "Let's assume we are working with a car part seller database (tpch). The data is available in a duckdb database. See the data model below:\n", 92 | "\n", 93 | "![TPCH data model](./assets/images/tpch_erd.png)\n", 94 | "\n", 95 | "We can create fake input data using the [create_input_data.py](https://github.com/josephmachado/de_project/blob/main/setup/create_input_data.py) as shown below:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "id": "196b6e1d-684a-4f1f-b048-7f9421e9ad55", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Cleaning up tpch and metadata db files\n", 109 | "Creating TPCH input data\n", 110 | "Creating metadata table\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "! python ./setup/create_input_data.py" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "id": "f6ed8600-727f-4a94-8f13-74d2210657c4", 121 | "metadata": {}, 122 | "source": [ 123 | "TPCH data is well modeled and because of this its easy to work with. However this is not always the case, in most real life projects you'd need to \n", 124 | "\n", 125 | "1. Identify grain(aka what one row in a table corresponds to) of the input data. At times there may be tables with multiple grains.\n", 126 | "2. Identify the business process that generates the data. This will dictate how you can actually extract input datasets. For this you'll need to create **[conceptual & logical data models](https://www.thoughtspot.com/data-trends/data-modeling/conceptual-vs-logical-vs-physical-data-models)**." 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "id": "8760073a-874c-4e50-9840-b2e785f9fc15", 132 | "metadata": {}, 133 | "source": [ 134 | "**Exercise** what is the relationship between the `orders` table and `customer` table.\n", 135 | "\n", 136 | "a. 1 to many\n", 137 | "\n", 138 | "b. many to 1\n", 139 | "\n", 140 | "c. many to many\n", 141 | "\n", 142 | "d. 1 to 1" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 11, 148 | "id": "61c88979-1fb7-43e3-b4b4-75da641be319", 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# TODO: Write your choice (a, b, c, d) here\n", 153 | "answer = 'None'" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "id": "b7ce2a1c-8e62-4397-b18d-a4821ba82b58", 159 | "metadata": {}, 160 | "source": [ 161 | "**Exercise**: what the relationship between the `customer` table and the `orders` table?\n", 162 | "\n", 163 | "a. 1 to many\n", 164 | "\n", 165 | "b. many to 1\n", 166 | "\n", 167 | "c. many to many\n", 168 | "\n", 169 | "d. 1 to 1\n" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 12, 175 | "id": "884f698e-cba0-4968-94d8-056d731d107c", 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "# TODO: Write your choice (a, b, c, d) here\n", 180 | "answer = 'None'" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "id": "792878dc-081c-4d8c-9fcc-12e6b744891a", 186 | "metadata": {}, 187 | "source": [ 188 | "## Define what the output dataset will look like\n", 189 | "\n", 190 | "Let's assume that the `customer` team has asked us to create a dataset that they will use for outreach (think cold emails, calls, etc.). \n", 191 | "\n", 192 | "Upon discussion with the `customer` team, you discover that the output dataset requires the following columns:\n", 193 | "\n", 194 | "For each customer (i.e., one row per customer)\n", 195 | "\n", 196 | "1. **customer_key**: The unique identifier for the customer \n", 197 | "2. **customer_name**: The customer name\n", 198 | "3. **min_order_value**: The value of the order with the lowest value placed by this customer\n", 199 | "4. **max_order_value**: The value of the order with the highest value placed by this customer\n", 200 | "5. **avg_order_value**: The average value of all the orders placed by this customer \n", 201 | "6. **avg_num_items_per_order**: The average number of items per order placed by this customer\n", 202 | "\n", 203 | "Let's write a simple query to see how we can get this (note that this process will take much longer with badly modeled input data)." 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 13, 209 | "id": "29118f12-3b06-41b3-952a-240fa7fa8408", 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "text/plain": [ 215 | "┌──────────────┬────────────────────┬─────────────────┬─────────────────┬────────────────────┬─────────────────────────┐\n", 216 | "│ customer_key │ customer_name │ min_order_value │ max_order_value │ avg_order_value │ avg_num_items_per_order │\n", 217 | "│ int64 │ varchar │ decimal(15,2) │ decimal(15,2) │ double │ double │\n", 218 | "├──────────────┼────────────────────┼─────────────────┼─────────────────┼────────────────────┼─────────────────────────┤\n", 219 | "│ 392 │ Customer#000000392 │ 1292.21 │ 271885.66 │ 108024.06785714286 │ 3.357142857142857 │\n", 220 | "│ 1301 │ Customer#000001301 │ 50022.91 │ 241494.43 │ 146510.135 │ 4.0 │\n", 221 | "│ 644 │ Customer#000000644 │ 42583.07 │ 289297.34 │ 150796.15375 │ 4.75 │\n", 222 | "│ 1471 │ Customer#000001471 │ 46234.48 │ 322828.75 │ 157227.18833333332 │ 4.458333333333333 │\n", 223 | "│ 649 │ Customer#000000649 │ 7076.04 │ 306172.25 │ 137233.325 │ 3.769230769230769 │\n", 224 | "│ 1118 │ Customer#000001118 │ 23657.43 │ 304032.02 │ 129655.616875 │ 3.875 │\n", 225 | "│ 910 │ Customer#000000910 │ 18356.56 │ 301984.03 │ 117390.56466666667 │ 3.066666666666667 │\n", 226 | "│ 433 │ Customer#000000433 │ 6979.69 │ 261813.62 │ 112029.48227272727 │ 3.409090909090909 │\n", 227 | "│ 223 │ Customer#000000223 │ 16413.91 │ 320004.89 │ 138502.89851851852 │ 3.6666666666666665 │\n", 228 | "│ 190 │ Customer#000000190 │ 23710.07 │ 303598.07 │ 150815.1305882353 │ 4.411764705882353 │\n", 229 | "│ · │ · │ · │ · │ · │ · │\n", 230 | "│ · │ · │ · │ · │ · │ · │\n", 231 | "│ · │ · │ · │ · │ · │ · │\n", 232 | "│ 278 │ Customer#000000278 │ 4475.34 │ 260264.60 │ 118369.94642857143 │ 4.0 │\n", 233 | "│ 1025 │ Customer#000001025 │ 2496.87 │ 372916.06 │ 148970.80444444445 │ 3.888888888888889 │\n", 234 | "│ 1466 │ Customer#000001466 │ 11774.01 │ 331745.34 │ 122943.17 │ 3.75 │\n", 235 | "│ 1115 │ Customer#000001115 │ 15203.11 │ 285556.50 │ 149029.482 │ 3.9 │\n", 236 | "│ 716 │ Customer#000000716 │ 34415.23 │ 107050.72 │ 70626.275 │ 2.75 │\n", 237 | "│ 70 │ Customer#000000070 │ 25899.97 │ 299230.95 │ 145769.858 │ 4.0 │\n", 238 | "│ 1043 │ Customer#000001043 │ 72320.70 │ 272351.23 │ 165986.5927272727 │ 5.0 │\n", 239 | "│ 1237 │ Customer#000001237 │ 26434.21 │ 347357.18 │ 146211.93625 │ 4.1875 │\n", 240 | "│ 652 │ Customer#000000652 │ 1733.41 │ 317103.71 │ 138943.87875 │ 4.291666666666667 │\n", 241 | "│ 938 │ Customer#000000938 │ 25312.01 │ 295762.23 │ 154128.94222222222 │ 4.0 │\n", 242 | "├──────────────┴────────────────────┴─────────────────┴─────────────────┴────────────────────┴─────────────────────────┤\n", 243 | "│ 1000 rows (20 shown) 6 columns │\n", 244 | "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" 245 | ] 246 | }, 247 | "execution_count": 13, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# simple query to get the output dataset\n", 254 | "import duckdb\n", 255 | "con = duckdb.connect(\"tpch.db\")\n", 256 | "con.sql(\"\"\"\n", 257 | "WITH order_items AS (\n", 258 | " SELECT\n", 259 | " l_orderkey,\n", 260 | " COUNT(*) AS item_count\n", 261 | " FROM\n", 262 | " lineitem\n", 263 | " GROUP BY\n", 264 | " l_orderkey\n", 265 | "),\n", 266 | "customer_orders AS (\n", 267 | " SELECT\n", 268 | " o.o_custkey,\n", 269 | " o.o_orderkey,\n", 270 | " o.o_totalprice,\n", 271 | " oi.item_count\n", 272 | " FROM\n", 273 | " orders o\n", 274 | " JOIN\n", 275 | " order_items oi ON o.o_orderkey = oi.l_orderkey\n", 276 | ")\n", 277 | "SELECT\n", 278 | " c.c_custkey AS customer_key,\n", 279 | " c.c_name AS customer_name,\n", 280 | " MIN(co.o_totalprice) AS min_order_value,\n", 281 | " MAX(co.o_totalprice) AS max_order_value,\n", 282 | " AVG(co.o_totalprice) AS avg_order_value,\n", 283 | " AVG(co.item_count) AS avg_num_items_per_order\n", 284 | "FROM\n", 285 | " customer c\n", 286 | "JOIN\n", 287 | " customer_orders co ON c.c_custkey = co.o_custkey\n", 288 | "GROUP BY\n", 289 | " c.c_custkey, c.c_name;\n", 290 | "\"\"\")" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 14, 296 | "id": "e66b874f-69b6-473c-be4c-ff0ab0e9bb6e", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "con.commit()\n", 301 | "con.close() " 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "id": "b8063abf-6654-4f8b-b038-b3efdf05dcb8", 307 | "metadata": {}, 308 | "source": [ 309 | "## Define SLAs so stakeholders know what to expect\n", 310 | "\n", 311 | "SLAs stand for service level agreement. SLAs define what end-users can expect from your service(data pipeline, in our case). While there are multiple ways to define SLAs, the common ones for data systems are:\n", 312 | "\n", 313 | "1. `Data freshness`\n", 314 | "2. `Data accuracy`\n", 315 | "\n", 316 | "Let's assume that our stakeholders require the data to be no older than 12 hours. This means that your pipeline should run completely at least once every 12 hours. If we assume that the pipeline runs in 2 hours, we need to ensure that it is run at least every 10 hours so that the data is not older than 12 hours at any given time.\n", 317 | "\n", 318 | "For data accuracy, we should define what accurate data is. Let's define accuracy in the following section." 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "id": "c4f25f7b-b3e6-4bda-ba4b-d1d98aae2b34", 324 | "metadata": {}, 325 | "source": [ 326 | "## Define checks to ensure the output dataset is usable\n", 327 | "\n", 328 | "We need to ensure that the data we produce is good enough for end-users to use. Typically, the data team works with end users to identify the critical metrics to check. \n", 329 | "\n", 330 | "Let's assume we have the following checks to ensure that the output dataset is accurate:\n", 331 | "\n", 332 | "1. **customer_key**: Has to be unique and not null\n", 333 | "2. **avg_***: columns should not differ by more than 5% compared to prior runs (across all customers)\n", 334 | "\n", 335 | "### **Recommended reading**: **[Types of data quality checks](https://www.startdataengineering.com/post/types-of-dq-checks/)** & **[Implementing data quality checks with Great Expectations](https://www.startdataengineering.com/post/implement_data_quality_with_great_expectations/)**\n", 336 | "\n" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "id": "afe33cbb-a2ff-49f2-bc3c-c5368da9bb02", 342 | "metadata": {}, 343 | "source": [ 344 | "**Exercise** What other (1 main) check can you think of for the output dataset?" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "id": "e6d595d0-fd16-4766-b712-907648a36579", 350 | "metadata": {}, 351 | "source": [ 352 | "# Identify what tool to use to process data\n", 353 | "\n", 354 | "We have a plethora of tools to process data, including Apache Spark, Snowflake, Python, Polars, and DuckDB. We will use Polars to process our data because it is small. The Polars library is easy to install and use.\n", 355 | "\n", 356 | "### **Recommended reading**: **[Choosing tools for your data project](https://www.startdataengineering.com/post/choose-tools-dp/#41-requirement-x-component-framework)**" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "id": "32e82ab1-b56f-4a4a-a100-bace58dfb9c4", 362 | "metadata": {}, 363 | "source": [ 364 | "# Data flow architecture\n", 365 | "\n", 366 | "Most data teams have their version of the 3-hop architecture. For example, dbt has its own version (stage, intermediate, mart), and Spark has medallion (bronze, silver, gold) architecture.\n", 367 | "\n", 368 | "You may be wondering why we need this data flow architecture when we have **[the results easily with a simple query shown here](./setup-data-project.ipynb#Columns-and-metics-needed-in-the-dataset-produced)**. \n", 369 | "\n", 370 | "While this is a simple example, in most real-world projects you want to have a standard, cleaned and modelled dataset(bronze) that can be use to create specialized dataset for end-users(gold). See below for how our data will flow:\n", 371 | "\n", 372 | "![Data Flow](./assets/images/dep-arch.png)\n", 373 | "\n", 374 | "### **Recommended reading**: **[Multi-hop architecture](https://www.startdataengineering.com/post/de_best_practices/#31-use-standard-patterns-that-progressively-transform-your-data)** " 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "id": "aa70beaa-6dbc-412e-b468-96d72ca18622", 380 | "metadata": {}, 381 | "source": [ 382 | "## Bronze: Extract raw data and confine it to standard names and data types \n", 383 | "\n", 384 | "Since our dataset has data from customer, nation, region, order, and lineitem input datasets, we will bring those data into bronze tables. We will keep their names the same as the input datasets.\n", 385 | "\n", 386 | "Let's explore the input datasets and create our bronze datasets." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 15, 392 | "id": "08122bcc-e005-4139-8a36-b854ea30c16c", 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "# read customer, order, and lineitem dataset from duckdb into Polars dataframe\n", 397 | "import duckdb\n", 398 | "import polars as pl\n", 399 | "\n", 400 | "con = duckdb.connect(\"tpch.db\")\n", 401 | "customer_df = con.sql(\"select * from customer\").pl()\n", 402 | "orders_df = con.sql(\"select * from orders\").pl()\n", 403 | "lineitem_df = con.sql(\"select * from lineitem\").pl()\n", 404 | "nation_df = con.sql(\"select * from nation\").pl()\n", 405 | "region_df = con.sql(\"select * from region\").pl()\n", 406 | "\n", 407 | "con.close() #close DuckDB connection\n", 408 | "\n", 409 | "# remove c_ and then rename custkey to customer_key\n", 410 | "cleaned_customer_df = customer_df.rename(lambda col_name: col_name[2:]).rename({\"custkey\": \"customer_key\"})\n", 411 | "\n", 412 | "# remove the n_ and r_ from the nation and region table's column names\n", 413 | "cleaned_nation_df = nation_df.rename(lambda col_name: col_name[2:])\n", 414 | "cleaned_region_df = region_df.rename(lambda col_name: col_name[2:])\n" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "id": "69049d33-13ce-4f68-a146-91011e449244", 420 | "metadata": {}, 421 | "source": [ 422 | "**Exercise:**\n", 423 | "Write code (similar to above)\n", 424 | "1. Remove the o_ and l_ from the order and lineitem table's column names\n", 425 | "2. We also rename customer key and order key to customer_key and order_key respectively" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 16, 431 | "id": "6c21fc91-8d15-4db3-8692-fce9667541e3", 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [ 435 | "# your answer" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "id": "fc7c521a-e814-44da-9fc0-6b663ee4762e", 441 | "metadata": {}, 442 | "source": [ 443 | "## Silver: Model data for analytics\n", 444 | "\n", 445 | "In the silver layer, the datasets are modeled using one of the popular styles (e.g., Kimball, Data Vault, etc.). We will use Kimball's dimensional model, as it is the most commonly used one and can account for many use cases.\n", 446 | "\n", 447 | "### Data modeling\n", 448 | "\n", 449 | "We will create the following datasets\n", 450 | "\n", 451 | "1. **dim_customer**: A customer level table with all the necessary attributes of a customer. We will join nation and region data to the `cleaned_customer_df` to get all the attributes associated with a customer.\n", 452 | "2. **fct_orders**: An order level fact(an event that happened) table. This will be the same as `cleaned_orders_df` since the `orders` table has all the necessary details about the order and how it associates with dimension tables like `customer_key`.\n", 453 | "3. **fct_lineitem**: A lineitem (items that are part of an order) fact table. This table will be the same as `cleaned_lineitem_df` since the `lineitem` table has all the lineitem level details and keys to associate with dimension tables like `partkey` and `suppkey`.\n", 454 | "\n", 455 | "### **Recommended reading**: **[Data warehouse overview](https://www.startdataengineering.com/post/what-is-a-data-warehouse/#3-what-is-a-data-warehouse)**" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 18, 461 | "id": "640ea92d-31e3-4878-90aa-24eafb2d193d", 462 | "metadata": {}, 463 | "outputs": [], 464 | "source": [ 465 | "# create customer dimension by left-joining all the necessary data\n", 466 | "dim_customer = cleaned_customer_df\\\n", 467 | ".join(cleaned_nation_df, on=\"nationkey\", how=\"left\", suffix=\"_nation\")\\\n", 468 | ".join(cleaned_region_df, on=\"regionkey\", how=\"left\", suffix=\"_region\")\\\n", 469 | ".rename({\n", 470 | " \"name_nation\": \"nation_name\",\n", 471 | " \"name_region\": \"region_name\",\n", 472 | " \"comment_nation\": \"nation_comment\",\n", 473 | " \"comment_region\": \"region_comment\"\n", 474 | "})\n", 475 | "\n", 476 | "# Most fact tables are direct data from the app\n", 477 | "fct_orders = cleaned_orders_df\n", 478 | "fct_lineitem = cleaned_lineitem_df\n" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "id": "04941142-588b-443e-874c-ed9acf153276", 484 | "metadata": {}, 485 | "source": [ 486 | "## Gold: Create tables for end-users\n", 487 | "\n", 488 | "The gold layer contains datasets required for the end user. The user-required datasets are fact tables joined with dimension tables aggregated to the necessary grain. In real-world projects, multiple teams/users ask for datasets with differing grains from the same underlying fact and dimension tables. While you can join the necessary tables and aggregate them individually for each ask, it leads to repeated code and joins.\n", 489 | "\n", 490 | "To avoid this issue, companies typically do the following:\n", 491 | "\n", 492 | "1. **OBT**: This is a fact table with multiple dimension tables left joined with it.\n", 493 | "2. **pre-aggregated table**: The OBT table rolled up to the end user/team requested grain. The pre-aggregated dataset will be the dataset that the end user accesses. By providing the end user with the exact columns they need, we can ensure that all the metrics are in one place and issues due to incorrect metric calculations by end users are significantly reduced. These tables act as our end-users SOT (source of truth). " 494 | ] 495 | }, 496 | { 497 | "cell_type": "markdown", 498 | "id": "01bb2a65-8fe5-4b14-969f-03cbc9317477", 499 | "metadata": {}, 500 | "source": [ 501 | "### OBT: Join the fact table with all its dimensions\n", 502 | "\n", 503 | "In our example, we have two fact tables, `fct_orders` and `fct_lineitem`. Since we only have one dimension, `dim_customer,` we can join `fct_orders` and `dim_customer` to create `wide_orders`. For our use case, we can keep `fct_lineitem` as `wide_lineitem`.\n", 504 | "\n", 505 | "That said, we can easily see a case where we might need to join `parts` and `supplier` data with `fct_lineitem` to get `wide_lineitem`. But since our use case doesn't require this, we can skip it!\n", 506 | "\n", 507 | "Let's create our OBT tables" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 19, 513 | "id": "8e7bbb84-9c84-48df-a653-09d13a38cbda", 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "# create wide_orders table\n", 518 | "wide_orders = fct_orders.join(dim_customer, on=\"customer_key\", how=\"left\")\n", 519 | "\n", 520 | "# For our use case, we don't need more information at a lineitem level\n", 521 | "wide_lineitem = fct_lineitem" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "id": "ef0d3618-ca24-4f07-862c-324d86e423d3", 527 | "metadata": {}, 528 | "source": [ 529 | "**Exercise**: Assume that you have to create a `wide_lineitem` table with all the dimensions at its respective grain. What other tables will you left join with `fct_lineitem` table?\n" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": 20, 535 | "id": "d562fe8a-e306-4905-8d29-dca2903d057b", 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [ 539 | "# your answer here, all lower case, use tables names from the data model\n", 540 | "tables_to_left_join = []" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "id": "3a11e605-96ba-48c2-a1f8-0a8227cff3f3", 546 | "metadata": {}, 547 | "source": [ 548 | "### Pre-aggregated tables: Aggregate OBTs to stakeholder-specific grain\n", 549 | "\n", 550 | "According to our **[data requirements](./setup-data-project.ipynb#Columns-and-metics-needed-in-the-dataset-produced)**, we need data from customer, orders, and lineitem. Since we already have customer and order data in `wide_orders`, we can join that with `wide_lineitem` to get the necessary data.\n", 551 | "\n", 552 | "We can call the final dataset `customer_outreach_metrics` (read **[this article that discusses the importance of naming](https://docs.getdbt.com/blog/on-the-importance-of-naming)**).\n", 553 | "\n", 554 | "Let's create our final dataset in Python " 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 21, 560 | "id": "9ea6cd9c-7556-4b67-af6d-068808222f25", 561 | "metadata": {}, 562 | "outputs": [], 563 | "source": [ 564 | "# create customer_outreach_metrics\n", 565 | "\n", 566 | "# get the number of lineitems per order\n", 567 | "order_lineitem_metrics = wide_lineitem.group_by(pl.col(\"order_key\")).agg(pl.col(\"linenumber\").count().alias(\"num_lineitems\"))\n", 568 | "# join the above df with wide_orders and group by customer key in wide orders to get avg, min, max order value & avg num items per order\n", 569 | "customer_outreach_metrics = wide_orders\\\n", 570 | ".join(order_lineitem_metrics, on=\"order_key\", how=\"left\")\\\n", 571 | ".group_by(\n", 572 | " pl.col(\"customer_key\"), \n", 573 | " pl.col(\"name\").alias(\"customer_name\"))\\\n", 574 | ".agg(\n", 575 | " pl.min(\"totalprice\").alias(\"min_order_value\"),\n", 576 | " pl.max(\"totalprice\").alias(\"max_order_value\"),\n", 577 | " pl.mean(\"totalprice\").alias(\"avg_order_value\"),\n", 578 | " pl.mean(\"num_lineitems\").alias(\"avg_num_items_per_order\"),\n", 579 | ")" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 22, 585 | "id": "a649f4c9-9070-4a13-85bd-507f2951c5ae", 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/html": [ 591 | "
\n", 598 | "shape: (2, 6)
customer_keycustomer_namemin_order_valuemax_order_valueavg_order_valueavg_num_items_per_order
i64strdecimal[15,2]decimal[15,2]decimal[15,2]f64
1478"Customer#000001478"45943.93320623.44null4.6
416"Customer#000000416"53224.01279221.38null4.714286
" 599 | ], 600 | "text/plain": [ 601 | "shape: (2, 6)\n", 602 | "┌──────────────┬────────────────┬────────────────┬────────────────┬────────────────┬───────────────┐\n", 603 | "│ customer_key ┆ customer_name ┆ min_order_valu ┆ max_order_valu ┆ avg_order_valu ┆ avg_num_items │\n", 604 | "│ --- ┆ --- ┆ e ┆ e ┆ e ┆ _per_order │\n", 605 | "│ i64 ┆ str ┆ --- ┆ --- ┆ --- ┆ --- │\n", 606 | "│ ┆ ┆ decimal[15,2] ┆ decimal[15,2] ┆ decimal[15,2] ┆ f64 │\n", 607 | "╞══════════════╪════════════════╪════════════════╪════════════════╪════════════════╪═══════════════╡\n", 608 | "│ 1478 ┆ Customer#00000 ┆ 45943.93 ┆ 320623.44 ┆ null ┆ 4.6 │\n", 609 | "│ ┆ 1478 ┆ ┆ ┆ ┆ │\n", 610 | "│ 416 ┆ Customer#00000 ┆ 53224.01 ┆ 279221.38 ┆ null ┆ 4.714286 │\n", 611 | "│ ┆ 0416 ┆ ┆ ┆ ┆ │\n", 612 | "└──────────────┴────────────────┴────────────────┴────────────────┴────────────────┴───────────────┘" 613 | ] 614 | }, 615 | "execution_count": 22, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "customer_outreach_metrics.limit(2)" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "id": "2afa41ba-26b5-43a1-8145-3cff8352239c", 627 | "metadata": {}, 628 | "source": [ 629 | "**Question**: Why is `avg_order_value` all `null`?" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 23, 635 | "id": "c2c1bebe-9999-444c-84e4-b1efcb4ae370", 636 | "metadata": {}, 637 | "outputs": [], 638 | "source": [ 639 | "# your answer\n", 640 | "answer = None" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "id": "276b7e0c-d854-48f4-afc6-e57c7e57684c", 646 | "metadata": {}, 647 | "source": [ 648 | "# Data quality and code testing" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "id": "bf8edcd6-58b6-4606-9eeb-896aaa82f4d7", 654 | "metadata": {}, 655 | "source": [ 656 | "## Data quality implementation\n", 657 | "\n", 658 | "As part of our requirements, we saw that the output dataset needs to have \n", 659 | "1. Unique and distinct `customer_key`\n", 660 | "2. Variance in `avg_*` columns between runs should not be more than 5% (across all customers)\n", 661 | "\n", 662 | "While the first test is a simple check, the second one requires that we use the data from previous runs and compare it with the current run's data or store the sum(avg_*) of each run. Let's store the run-level metrics in a run_metadata table (in sqlite3).\n", 663 | "\n", 664 | "Our pipelines should run data quality checks before making the data available to your end users. This ensures that you can catch any issues before they can cause damage.\n", 665 | "\n", 666 | "![WAP pattern](./assets/images/wap.png)\n", 667 | "\n", 668 | "### **Recommended reading**: **[Types of data quality checks](https://www.startdataengineering.com/post/types-of-dq-checks/)**, **[Implementing data quality checks with Great Expectations](https://www.startdataengineering.com/post/implement_data_quality_with_great_expectations/)**, & **[Write-Audit-Publish pattern](https://www.startdataengineering.com/post/de_best_practices/#32-ensure-data-is-valid-before-exposing-it-to-its-consumers-aka-data-quality-checks)**\n", 669 | "\n", 670 | "Let's see how we can implement DQ checks in a Python" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 25, 676 | "id": "d0f878b9-8826-41a9-a271-50f2a540ed2d", 677 | "metadata": {}, 678 | "outputs": [], 679 | "source": [ 680 | "import json\n", 681 | "\n", 682 | "# get current run's metrics\n", 683 | "curr_metrics = json.loads(\n", 684 | " customer_outreach_metrics\\\n", 685 | " .select(\n", 686 | " pl.col(\"avg_num_items_per_order\").alias(\"sum_avg_num_items_per_order\"),\n", 687 | " pl.col(\"avg_order_value\").alias(\"sum_avg_order_value\")\n", 688 | " )\\\n", 689 | " .sum()\\\n", 690 | " .write_json())[0]\n", 691 | "\n" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 26, 697 | "id": "2095ed50-2afc-4860-bf8a-2e529ea8de0a", 698 | "metadata": {}, 699 | "outputs": [], 700 | "source": [ 701 | "# Store run metadata in a table\n", 702 | "import sqlite3\n", 703 | "\n", 704 | "# Connect to SQLite database\n", 705 | "conn = sqlite3.connect('metadata.db')\n", 706 | "\n", 707 | "# Create a cursor object\n", 708 | "cursor = conn.cursor()\n", 709 | "\n", 710 | "# Insert data into the run_metadata table\n", 711 | "cursor.execute('''\n", 712 | " INSERT INTO run_metadata (run_id, metadata)\n", 713 | " VALUES (?, ?)\n", 714 | "''', ('2024-09-15-10-00', json.dumps(curr_metrics)))\n", 715 | "\n", 716 | "# Commit the changes and close the connection\n", 717 | "conn.commit()\n", 718 | "conn.close()" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 27, 724 | "id": "60034e3f-e014-4cf8-b938-2cf5d8564219", 725 | "metadata": {}, 726 | "outputs": [], 727 | "source": [ 728 | "# Assume that another run of the data pipeline has been completed\n", 729 | "\n", 730 | "# Get the most recent data from the run_metadata table\n", 731 | "import sqlite3\n", 732 | "\n", 733 | "# Connect to SQLite database\n", 734 | "conn = sqlite3.connect('metadata.db')\n", 735 | "\n", 736 | "# Create a cursor object\n", 737 | "cursor = conn.cursor()\n", 738 | "\n", 739 | "# Fetch the most recent row based on run_id\n", 740 | "cursor.execute('''\n", 741 | " SELECT * FROM run_metadata\n", 742 | " ORDER BY run_id DESC\n", 743 | " LIMIT 1\n", 744 | "''')\n", 745 | "\n", 746 | "# Get the result\n", 747 | "most_recent_row = cursor.fetchone()\n", 748 | "\n", 749 | "# Close the connection\n", 750 | "conn.close()" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": 28, 756 | "id": "1f6a003c-bec7-42b6-88fb-7a58aa6c9c49", 757 | "metadata": {}, 758 | "outputs": [], 759 | "source": [ 760 | "# get the most recent metric\n", 761 | "prev_metric = json.loads(most_recent_row[1])\n", 762 | "\n", 763 | "# get current metric\n", 764 | "# This assumes the pipeline is rerun\n", 765 | "curr_metric = json.loads(\n", 766 | " customer_outreach_metrics\\\n", 767 | " .select(\n", 768 | " pl.col(\"avg_num_items_per_order\").alias(\"sum_avg_num_items_per_order\"),\n", 769 | " pl.col(\"avg_order_value\").cast(int).alias(\"sum_avg_order_value\")\n", 770 | " )\\\n", 771 | " .sum()\\\n", 772 | " .write_json())[0]\n", 773 | "\n", 774 | "# Compare with current data for variance percentage\n", 775 | "def percentage_difference(val1, val2):\n", 776 | " if val1 == 0 and val2 == 0:\n", 777 | " return 0.0\n", 778 | " elif val1 == 0 or val2 == 0:\n", 779 | " return 100.0\n", 780 | " return abs((val1 - val2) / ((val1 + val2) / 2)) * 100\n", 781 | "\n", 782 | "prev_metric['sum_avg_order_value'] = int(float(prev_metric['sum_avg_order_value']))\n", 783 | "\n", 784 | "comparison = {}\n", 785 | "for key in curr_metric:\n", 786 | " if key in prev_metric:\n", 787 | " comparison[key] = percentage_difference(curr_metric[key], prev_metric[key])\n", 788 | "\n", 789 | "if prev_metric is None:\n", 790 | " print('No prev metric')\n", 791 | " \n", 792 | "# code to check if variance < 5\n", 793 | "for k, v in comparison.items():\n", 794 | " if v >= 5:\n", 795 | " raise Exception(f\"Difference for {k} is greater than 5%: {v}%\")" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 29, 801 | "id": "b1d7790a-ee75-4c42-9e08-1bb90b6a0221", 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [ 805 | "# Insert current run data into the run_metadata table\n", 806 | "# Store run metadata in a table\n", 807 | "import sqlite3\n", 808 | "from datetime import datetime\n", 809 | "\n", 810 | "# Get current timestamp and format it\n", 811 | "current_timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M')\n", 812 | "\n", 813 | "# Connect to SQLite database\n", 814 | "conn = sqlite3.connect('metadata.db')\n", 815 | "\n", 816 | "# Create a cursor object\n", 817 | "cursor = conn.cursor()\n", 818 | "comparison_json = json.dumps(comparison)\n", 819 | "\n", 820 | "# Insert data into the run_metadata table\n", 821 | "cursor.execute('''\n", 822 | " INSERT INTO run_metadata (run_id, metadata)\n", 823 | " VALUES (?, ?)\n", 824 | "''', (current_timestamp, comparison_json))\n", 825 | "\n", 826 | "# Commit the changes and close the connection\n", 827 | "conn.commit()\n", 828 | "conn.close()" 829 | ] 830 | }, 831 | { 832 | "cell_type": "markdown", 833 | "id": "2f0c070b-325a-4012-b141-43078d22d843", 834 | "metadata": {}, 835 | "source": [ 836 | "**Exercise:** There is a deliberate mistake in the above cell, what is it?\n", 837 | "\n", 838 | "**Hint** Is the code actually doing what it is supposed to do?" 839 | ] 840 | }, 841 | { 842 | "cell_type": "markdown", 843 | "id": "af97ecfd-8d29-4ead-bd7b-c0cb080a4854", 844 | "metadata": {}, 845 | "source": [ 846 | "Your answer here" 847 | ] 848 | }, 849 | { 850 | "cell_type": "markdown", 851 | "id": "944af0b7-1485-4f14-9662-4dba6a2a4d42", 852 | "metadata": {}, 853 | "source": [ 854 | "# Code organization\n", 855 | "\n", 856 | "Deciding how to organize your code can be overwhelming. Typically, companies use one of the following options to organize code:\n", 857 | "\n", 858 | "1. Based on multi-hop architecture. E.g. **[see this dbt folder structure](https://github.com/dbt-labs/jaffle_shop_duckdb/tree/duckdb/models)**\n", 859 | "2. Based on existing company standards.\n", 860 | "\n", 861 | "### Folder structure\n", 862 | "\n", 863 | "We can use the following folder structure for our use case(and most real-life projects).\n", 864 | "\n", 865 | "![Folder structure](./assets/images/folder.png)\n", 866 | "\n", 867 | "Each file under the elt folder will have the code necessary to generate that dataset. The above folder structure enables anyone new to the project to quickly understand where the code to create a certain dataset will be.\n", 868 | "\n", 869 | "Now compare this with dbt's recommended project structure:\n", 870 | "\n", 871 | "![dbt folder structure](./assets/images/dbtps.png)\n", 872 | "\n", 873 | "*Note* dbt recommends use of its `semantic layer` over pre-aggregated layer. The `semantic layer` involves aggregation with every query(approx) which can lead to skyrocketing costs.\n", 874 | "\n", 875 | "### Code modularity\n", 876 | "\n", 877 | "We have the code to create the necessary tables; now, we have to put them into functions that are easy to use and maintain. \n", 878 | "\n", 879 | "#### **Recommended reading**: **[How to write modular python code](https://www.startdataengineering.com/post/code-patterns/#1-functional-design)**\n", 880 | "\n", 881 | "We will define the function `create_dataset` for each table in the Python script for our use case. Having a common named function will enable\n", 882 | "\n", 883 | "1. Consistent naming. For example: `dim_customer.create_dataset`, `customer_outreach_metrics.create_dataset`\n", 884 | "2. Pull out code commonalities into a base class. Moving code into a common base class will be covered in a future post.\n", 885 | "\n", 886 | "Let's see what functions we would want to include in the `de_project/etl/gold/pre-aggregated/customer_outreach_metrics.py.`\n", 887 | "\n", 888 | "**Note** We have moved code that involves reading/writing to metadata into **[de_project/utils/metadata.py](https://github.com/josephmachado/de_project/blob/main/de_project/utils/metadata.py)**.\n" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": 30, 894 | "id": "66f58b52-dfcd-417f-add7-fbece129206b", 895 | "metadata": {}, 896 | "outputs": [], 897 | "source": [ 898 | "# de_project/utils/metadata.py\n", 899 | "import json\n", 900 | "\n", 901 | "import sqlite3\n", 902 | "from datetime import datetime\n", 903 | "\n", 904 | "def get_latest_run_metrics():\n", 905 | " # Connect to SQLite database\n", 906 | " conn = sqlite3.connect(\"metadata.db\")\n", 907 | "\n", 908 | " # Create a cursor object\n", 909 | " cursor = conn.cursor()\n", 910 | "\n", 911 | " # Fetch the most recent row based on run_id\n", 912 | " cursor.execute(\n", 913 | " \"\"\"\n", 914 | " SELECT * FROM run_metadata\n", 915 | " ORDER BY run_id DESC\n", 916 | " LIMIT 1\n", 917 | " \"\"\"\n", 918 | " )\n", 919 | "\n", 920 | " # Get the result\n", 921 | " most_recent_row = cursor.fetchone()\n", 922 | "\n", 923 | " # Close the connection\n", 924 | " conn.close()\n", 925 | " return (\n", 926 | " json.loads(most_recent_row[1])\n", 927 | " if most_recent_row and len(most_recent_row) > 0\n", 928 | " else None\n", 929 | " )\n", 930 | "\n", 931 | "\n", 932 | "def insert_run_metrics(curr_metrics):\n", 933 | " # Connect to SQLite database\n", 934 | " conn = sqlite3.connect(\"metadata.db\")\n", 935 | "\n", 936 | " # Create a cursor object\n", 937 | " cursor = conn.cursor()\n", 938 | " curr_metrics_json = json.dumps(curr_metrics)\n", 939 | "\n", 940 | " current_timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M')\n", 941 | " # Insert data into the run_metadata table\n", 942 | " cursor.execute(\n", 943 | " \"\"\"\n", 944 | " INSERT INTO run_metadata (run_id, metadata)\n", 945 | " VALUES (?, ?)\n", 946 | " \"\"\",\n", 947 | " (current_timestamp, curr_metrics_json),\n", 948 | " )\n", 949 | "\n", 950 | " # Commit the changes and close the connection\n", 951 | " conn.commit()\n", 952 | " conn.close()" 953 | ] 954 | }, 955 | { 956 | "cell_type": "code", 957 | "execution_count": 31, 958 | "id": "a106863d-24ed-42fe-bece-0755c005e2cb", 959 | "metadata": {}, 960 | "outputs": [], 961 | "source": [ 962 | "# de_project/etl/gold/pre-aggregated/customer_outreach_metrics.py\n", 963 | "import json\n", 964 | "import polars as pl\n", 965 | "\n", 966 | "def create_dataset(wide_lineitem, wide_orders):\n", 967 | " order_lineitem_metrics = wide_lineitem.group_by(pl.col(\"order_key\")).agg(\n", 968 | " pl.col(\"linenumber\").count().alias(\"num_lineitems\")\n", 969 | " )\n", 970 | " return (\n", 971 | " wide_orders.join(order_lineitem_metrics, on=\"order_key\", how=\"left\")\n", 972 | " .group_by(pl.col(\"customer_key\"), pl.col(\"name\").alias(\"customer_name\"))\n", 973 | " .agg(\n", 974 | " pl.min(\"totalprice\").alias(\"min_order_value\"),\n", 975 | " pl.max(\"totalprice\").alias(\"max_order_value\"),\n", 976 | " pl.mean(\"totalprice\").alias(\"avg_order_value\"),\n", 977 | " pl.mean(\"num_lineitems\").alias(\"avg_num_items_per_order\"),\n", 978 | " )\n", 979 | " )\n", 980 | "\n", 981 | "\n", 982 | "def percentage_difference(val1, val2):\n", 983 | " if val1 == 0 and val2 == 0:\n", 984 | " return 0.0\n", 985 | " elif val1 == 0 or val2 == 0:\n", 986 | " return 100.0\n", 987 | " return abs((val1 - val2) / ((val1 + val2) / 2)) * 100\n", 988 | "\n", 989 | "\n", 990 | "def check_no_duplicates(customer_outreach_metrics_df):\n", 991 | " # check uniqueness\n", 992 | " if (\n", 993 | " customer_outreach_metrics_df.filter(\n", 994 | " customer_outreach_metrics_df.select(pl.col(\"customer_key\")).is_duplicated()\n", 995 | " ).shape[0]\n", 996 | " > 0\n", 997 | " ):\n", 998 | " raise Exception(\"Duplicate customer_keys\")\n", 999 | "\n", 1000 | "\n", 1001 | "def check_variance(customer_outreach_metrics_df, perc_threshold=5):\n", 1002 | " prev_metric = get_latest_run_metrics()\n", 1003 | " if prev_metric is None:\n", 1004 | " return\n", 1005 | " prev_metric['sum_avg_order_value'] = int(float(prev_metric['sum_avg_order_value']))\n", 1006 | " curr_metric = json.loads(\n", 1007 | " customer_outreach_metrics_df.select(\n", 1008 | " pl.col(\"avg_num_items_per_order\").alias(\"sum_avg_num_items_per_order\"),\n", 1009 | " pl.col(\"avg_order_value\").cast(int).alias(\"sum_avg_order_value\"),\n", 1010 | " )\n", 1011 | " .sum()\n", 1012 | " .write_json()\n", 1013 | " )[0]\n", 1014 | " comparison = {}\n", 1015 | " for key in curr_metric:\n", 1016 | " if key in prev_metric:\n", 1017 | " comparison[key] = percentage_difference(curr_metric[key], prev_metric[key])\n", 1018 | "\n", 1019 | " for k, v in comparison.items():\n", 1020 | " if v >= perc_threshold:\n", 1021 | " raise Exception(f\"Difference for {k} is greater than 5%: {v}%\")\n", 1022 | "\n", 1023 | "\n", 1024 | "def validate_dataset(customer_outreach_metrics_df):\n", 1025 | " # data quality checks\n", 1026 | " check_no_duplicates(customer_outreach_metrics_df)\n", 1027 | " check_variance(customer_outreach_metrics_df)" 1028 | ] 1029 | }, 1030 | { 1031 | "cell_type": "markdown", 1032 | "id": "603bde44-87c1-42a4-96ab-3fc88a613221", 1033 | "metadata": {}, 1034 | "source": [ 1035 | "Notice how we keep the functions performing one task and how the function name is `verb_noun`." 1036 | ] 1037 | }, 1038 | { 1039 | "cell_type": "markdown", 1040 | "id": "b29b6338-fd2c-4e5b-be75-39f7d8a0d510", 1041 | "metadata": {}, 1042 | "source": [ 1043 | "**Exercise**: How would you improve the `check_variance` method?\n" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "markdown", 1048 | "id": "e81df2bf-61b5-4bd6-9017-bc021b03660a", 1049 | "metadata": {}, 1050 | "source": [ 1051 | "Your answer here" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "markdown", 1056 | "id": "5fd37db0-7496-4dc0-b03e-7994af55564d", 1057 | "metadata": {}, 1058 | "source": [ 1059 | "## Code testing\n", 1060 | "\n", 1061 | "We will use `pytest` to test our code. Let's write a test case to test the `create_dataset` function for the `dim_customer` dataset. The test code is at **[de_project/tests/unit/dim_customer.py](https://github.com/josephmachado/de_project/blob/main/de_project/tests/unit/test_dim_customer.py)**.\n", 1062 | "\n", 1063 | "### **Recommended reading**: **[How to use pytest to test your code](https://www.startdataengineering.com/post/code-patterns/#4-testing-with-pytest)**\n", 1064 | "\n", 1065 | "We can run the tests via the terminal using\n" 1066 | ] 1067 | }, 1068 | { 1069 | "cell_type": "code", 1070 | "execution_count": 32, 1071 | "id": "52e33dae-f49b-4e4b-be52-d4e56319bb54", 1072 | "metadata": {}, 1073 | "outputs": [ 1074 | { 1075 | "name": "stdout", 1076 | "output_type": "stream", 1077 | "text": [ 1078 | "\u001b[1m============================= test session starts ==============================\u001b[0m\n", 1079 | "platform linux -- Python 3.12.4, pytest-8.3.3, pluggy-1.5.0\n", 1080 | "rootdir: /home/josephkevinmachado/code/de_project\n", 1081 | "plugins: anyio-4.4.0\n", 1082 | "collected 1 item \u001b[0m\n", 1083 | "\n", 1084 | "de_project/tests/unit/test_dim_customer.py \u001b[32m.\u001b[0m\u001b[32m [100%]\u001b[0m\n", 1085 | "\n", 1086 | "\u001b[32m============================== \u001b[32m\u001b[1m1 passed\u001b[0m\u001b[32m in 0.12s\u001b[0m\u001b[32m ===============================\u001b[0m\n" 1087 | ] 1088 | } 1089 | ], 1090 | "source": [ 1091 | "! python -m pytest de_project/tests/unit/test_dim_customer.py" 1092 | ] 1093 | }, 1094 | { 1095 | "cell_type": "markdown", 1096 | "id": "b2321849-0a2d-463d-9bf7-59a68dec8d0e", 1097 | "metadata": {}, 1098 | "source": [ 1099 | "We will add the below code to our unit test case and run pytest." 1100 | ] 1101 | }, 1102 | { 1103 | "cell_type": "markdown", 1104 | "id": "0b3beae4-cdd5-4ccd-b12d-5cdb323ced1c", 1105 | "metadata": {}, 1106 | "source": [ 1107 | "## Orchestration and scheduling\n", 1108 | "\n", 1109 | "We will run `Apache Airflow` to schedule and orchestrate our pipeline. We will set the schedule to be every 1 min (so that we can check them quickly) and we only have to run **[this function](https://github.com/josephmachado/de_project/blob/b9287d8e3a78f91626da71e8ed886875095f59dc/de_project/run_pipeline.py#L7)** which runs the ETL and outputs the `customer_outreach_metrics` data.\n", 1110 | "\n", 1111 | "### **Recommended reading**: **[Why use Airflow](https://www.startdataengineering.com/post/why-to-use-orchestrators/)** & **[Docker for data engineers](https://www.startdataengineering.com/post/docker-for-de/)**." 1112 | ] 1113 | }, 1114 | { 1115 | "cell_type": "code", 1116 | "execution_count": 33, 1117 | "id": "c38b92a3-801e-4505-967c-f97c418100b7", 1118 | "metadata": {}, 1119 | "outputs": [ 1120 | { 1121 | "name": "stdout", 1122 | "output_type": "stream", 1123 | "text": [ 1124 | "Cleaning up tpch and metadata db files\n", 1125 | "Creating TPCH input data\n", 1126 | "Creating metadata table\n" 1127 | ] 1128 | } 1129 | ], 1130 | "source": [ 1131 | "! python ./setup/create_input_data.py\n", 1132 | "# recreate data to ensure data from above sections are cleaned out" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "code", 1137 | "execution_count": null, 1138 | "id": "1778ebe7-f7f8-47d7-b086-8955b3c9b6a2", 1139 | "metadata": {}, 1140 | "outputs": [], 1141 | "source": [ 1142 | "# Run this your terminal, if the docker compose up command fails for you\n", 1143 | "#! sudo mkdir -p logs plugins temp dags tests migrations data visualization de_project && sudo chmod -R u=rwx,g=rwx,o=rwx logs plugins temp dags tests migrations data visualization de_project tpch.db metadata.db" 1144 | ] 1145 | }, 1146 | { 1147 | "cell_type": "markdown", 1148 | "id": "8de159e4-95e6-44bd-8957-e231cf713ab0", 1149 | "metadata": {}, 1150 | "source": [ 1151 | "When running the command above on VSCode, use its terminal.\n", 1152 | "\n", 1153 | "Here's how you can run this via terminal (when using Jupyter notebook):\n", 1154 | "\n", 1155 | "