├── .devcontainer └── devcontainer.json ├── .gitattributes ├── .github └── workflows │ ├── project_setup.yml │ ├── run_dbt.yml │ ├── run_dbt_cleanup.yml │ ├── run_dbt_force.yml │ ├── run_dbt_on_cron.yml │ ├── run_dbt_on_pr.yml │ └── run_incremental_dbt_on_merge.yml ├── .gitignore ├── README.md ├── b64.py ├── get_manifest.sh ├── print_json.py ├── project_goes_here ├── .python-version ├── .user.yml ├── LICENSE ├── README.md ├── dbt_project.yml ├── etc │ ├── dbdiagram_definition.txt │ └── jaffle_shop_erd.png ├── macros │ └── drop_orphanate_tables.sql ├── models │ ├── customers.sql │ ├── docs.md │ ├── orders.sql │ ├── overview.md │ ├── schema.yml │ └── staging │ │ ├── schema.yml │ │ ├── stg_customers.sql │ │ ├── stg_orders.sql │ │ └── stg_payments.sql ├── packages.yml ├── profiles.yml └── seeds │ ├── .gitkeep │ ├── raw_customers.csv │ ├── raw_orders.csv │ └── raw_payments.csv ├── requirements.txt ├── save_and_publish_docs.sh └── update_profile_with_prod.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "mcr.microsoft.com/devcontainers/universal:2", 3 | "features": { 4 | "ghcr.io/devcontainers/features/python:1": {} 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/workflows/project_setup.yml: -------------------------------------------------------------------------------- 1 | name: Project Setup 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | logLevel: 7 | description: 'Log level' 8 | required: true 9 | default: 'warning' 10 | type: choice 11 | options: 12 | - info 13 | - warning 14 | - debug 15 | DatawarehouseType: 16 | type: choice 17 | required: true 18 | description: DWH type 19 | options: 20 | - postgres 21 | - snowflake 22 | - redshift 23 | - bigquery 24 | tags: 25 | description: 'Set up the project to run on GH Actions' 26 | required: false 27 | type: boolean 28 | 29 | jobs: 30 | project_setup: 31 | runs-on: ubuntu-latest 32 | 33 | steps: 34 | - uses: "actions/checkout@v3" 35 | - uses: "actions/setup-python@v2" 36 | with: 37 | python-version: "3.9" 38 | 39 | - name: install requirements 40 | run: pip install -q -r requirements.txt 41 | 42 | - name: Add prod target to profiles.yml 43 | run: "python update_profile_with_prod.py ${{ github.event.inputs.DatawarehouseType }}" 44 | 45 | - name: Create PR with updated profiles.yml file 46 | uses: peter-evans/create-pull-request@v5 47 | with: 48 | title: "Add prod target to profiles.yml" 49 | commit-message: "Add prod target to profiles.yml" 50 | branch: "add-prod-target" 51 | base: "main" 52 | token: ${{ secrets.WORKFLOW_TOKEN }} 53 | body: | 54 | This pull request adds a new target called "prod" to the `profiles.yml` file 55 | -------------------------------------------------------------------------------- /.github/workflows/run_dbt.yml: -------------------------------------------------------------------------------- 1 | name: Manual dbt build - prod 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | logLevel: 7 | description: 'Log level' 8 | required: true 9 | default: 'warning' 10 | type: choice 11 | options: 12 | - info 13 | - warning 14 | - debug 15 | tags: 16 | description: 'Manually trigger a dbt run' 17 | required: false 18 | type: boolean 19 | 20 | jobs: 21 | run_dbt: 22 | runs-on: ubuntu-latest 23 | env: 24 | PROJECT_NAME: ${{ secrets.PROJECT_NAME }} 25 | DATASET: ${{ secrets.DATASET }} 26 | steps: 27 | - uses: "actions/checkout@main" 28 | 29 | - name: Create file 30 | run: touch google-key.json 31 | 32 | - name: Putting data 33 | env: 34 | DATA: ${{ secrets.GOOGLE_SERVICES_JSON }} 35 | run: echo $DATA > google-key.json 36 | 37 | - name: install requirements 38 | run: pip install -q -r requirements.txt 39 | 40 | - name: dbt deps 41 | run: | 42 | cd project_goes_here 43 | dbt deps 44 | 45 | - name: dbt build 46 | run: | 47 | cd project_goes_here 48 | dbt build --target prod 49 | dbt snapshot --target prod 50 | 51 | -------------------------------------------------------------------------------- /.github/workflows/run_dbt_cleanup.yml: -------------------------------------------------------------------------------- 1 | name: scheduled clean up job - drop non-prod scheams 2 | 3 | on: 4 | schedule: 5 | - cron: "0 12 * * *" 6 | jobs: 7 | dbt_scheduled_cleanup_run: 8 | runs-on: ubuntu-latest 9 | env: 10 | PROJECT_NAME: ${{ secrets.PROJECT_NAME }} 11 | DATASET: ${{ secrets.DATASET }} 12 | steps: 13 | - uses: "actions/checkout@main" 14 | 15 | - name: install requirements 16 | run: pip install -q -r requirements.txt 17 | 18 | - name: dbt deps 19 | run: | 20 | cd project_goes_here 21 | dbt deps 22 | 23 | - name: dbt run operation 24 | run: | 25 | cd project_goes_here 26 | dbt run-operation drop_old_relations --args '{dry_run: "true"}' 27 | -------------------------------------------------------------------------------- /.github/workflows/run_dbt_force.yml: -------------------------------------------------------------------------------- 1 | name: Manual dbt run force - prod 2 | 3 | # sometimes you need to get a prod run through without running your tests 4 | # use sparingly and your own risk 5 | 6 | on: 7 | workflow_dispatch: 8 | inputs: 9 | logLevel: 10 | description: 'Log level' 11 | required: true 12 | default: 'warning' 13 | type: choice 14 | options: 15 | - info 16 | - warning 17 | - debug 18 | tags: 19 | description: 'Manually trigger a dev dbt run' 20 | required: false 21 | type: boolean 22 | 23 | jobs: 24 | run_dbt: 25 | runs-on: ubuntu-latest 26 | env: 27 | PROJECT_NAME: ${{ secrets.PROJECT_NAME }} 28 | DATASET: ${{ secrets.DATASET }} 29 | steps: 30 | - uses: "actions/checkout@main" 31 | 32 | - name: install requirements 33 | run: pip install -q -r requirements.txt 34 | 35 | - name: dbt deps 36 | run: | 37 | cd project_goes_here 38 | dbt deps 39 | 40 | - name: dbt build 41 | run: | 42 | cd project_goes_here 43 | dbt run --target prod 44 | 45 | -------------------------------------------------------------------------------- /.github/workflows/run_dbt_on_cron.yml: -------------------------------------------------------------------------------- 1 | name: Scheduled dbt run - prod 2 | 3 | #on: 4 | # schedule: 5 | # - cron: "10 10 * * *" 6 | 7 | 8 | on: 9 | workflow_dispatch: 10 | inputs: 11 | logLevel: 12 | description: 'Log level' 13 | required: true 14 | default: 'warning' 15 | type: choice 16 | options: 17 | - info 18 | - warning 19 | - debug 20 | tags: 21 | description: 'Manually trigger a dbt run' 22 | required: false 23 | type: boolean 24 | jobs: 25 | dbt_scheduled_run: 26 | runs-on: ubuntu-latest 27 | env: 28 | PROJECT_NAME: ${{ secrets.PROJECT_NAME }} 29 | DATASET: ${{ secrets.DATASET }} 30 | steps: 31 | - uses: "actions/checkout@main" 32 | 33 | - name: Create file 34 | run: touch google-key.json 35 | 36 | - name: Putting data 37 | env: 38 | DATA: ${{ secrets.GOOGLE_SERVICES_JSON }} 39 | run: echo $DATA > google-key.json 40 | 41 | - name: install requirements 42 | run: pip install -q -r requirements.txt 43 | 44 | - name: dbt deps 45 | run: | 46 | cd project_goes_here 47 | dbt deps 48 | 49 | - name: Get manifest.json from gh-pages branch 50 | run: | 51 | ./get_manifest.sh 52 | 53 | - name: dbt build 54 | run: | 55 | cd project_goes_here 56 | if [ -f "prev_run_state/manifest.json" ]; then 57 | dbt build --full-refresh --fail-fast --select state:modified+ --state prev_run_state 58 | dbt docs generate 59 | else 60 | dbt build 61 | dbt docs generate 62 | fi 63 | 64 | - name: Push new documentation to GH Pages 65 | run: | 66 | ./save_and_publish_docs.sh 67 | -------------------------------------------------------------------------------- /.github/workflows/run_dbt_on_pr.yml: -------------------------------------------------------------------------------- 1 | name: Run dbt on PR to main 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | push: 8 | branches: 9 | - '!main' 10 | 11 | jobs: 12 | dbt_run_on_pr: 13 | runs-on: ubuntu-latest 14 | env: 15 | PROJECT_NAME: ${{ secrets.PROJECT_NAME }} 16 | DATASET: ${{ secrets.DATASET }} 17 | steps: 18 | - uses: "actions/checkout@main" 19 | 20 | - name: install requirements 21 | run: pip install -q -r requirements.txt 22 | 23 | - name: Create file 24 | run: touch google-key.json 25 | 26 | - name: Putting data 27 | env: 28 | DATA: ${{ secrets.GOOGLE_SERVICES_JSON }} 29 | run: echo $DATA > google-key.json 30 | 31 | - name: dbt deps 32 | run: | 33 | cd project_goes_here 34 | dbt deps 35 | 36 | - name: Get manifest.json from gh-pages branch 37 | run: | 38 | ./get_manifest.sh 39 | 40 | # thanks for this datafold! 41 | - name: Find Current Pull Request 42 | uses: jwalton/gh-find-current-pr@v1.3.0 43 | id: findPR 44 | 45 | # do we need to point to a specific profile? 46 | - name: dbt build 47 | run: | 48 | cd project_goes_here 49 | dbt build --target dev --select state:modified+ --state prev_run_state 50 | env: 51 | SCHEMA: "${{ format('{0}_{1}', 'PR_NUM', steps.findPr.outputs.pr) }}" 52 | 53 | 54 | -------------------------------------------------------------------------------- /.github/workflows/run_incremental_dbt_on_merge.yml: -------------------------------------------------------------------------------- 1 | name: Incremental run dbt on merge to Main 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | # leaving this for testing 7 | workflow_dispatch: 8 | inputs: 9 | logLevel: 10 | description: 'Log level' 11 | required: true 12 | default: 'warning' 13 | type: choice 14 | options: 15 | - info 16 | - warning 17 | - debug 18 | tags: 19 | description: 'Manually trigger a dbt run' 20 | required: false 21 | type: boolean 22 | 23 | jobs: 24 | dbt_run_on_merge_incremental: 25 | runs-on: ubuntu-latest 26 | env: 27 | PROJECT_NAME: ${{ secrets.PROJECT_NAME }} 28 | DATASET: ${{ secrets.DATASET }} 29 | steps: 30 | - uses: "actions/checkout@v4" 31 | with: 32 | ref: gh-pages 33 | - uses: "actions/checkout@v4" 34 | 35 | - name: install requirements 36 | run: pip install -q -r requirements.txt 37 | 38 | - name: dbt deps 39 | run: | 40 | cd project_goes_here 41 | dbt deps 42 | 43 | - name: Get manifest.json from gh-pages branch 44 | run: | 45 | ./get_manifest.sh 46 | 47 | - name: dbt build 48 | id: build 49 | if: ${{ always() && '!cancelled()' }} 50 | run: | 51 | cd project_goes_here 52 | if [ -f "prev_run_state/manifest.json" ]; then 53 | dbt build --full-refresh --fail-fast --select state:modified+ --state prev_run_state 54 | dbt docs generate 55 | else 56 | dbt build 57 | dbt docs generate 58 | fi 59 | 60 | - name: Push new documentation to GH Pages 61 | if: ${{ (success() || failure()) && steps.build.conclusion == 'success' }} 62 | run: | 63 | ./save_and_publish_docs.sh 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | project_goes_here/logs 2 | project_goes_here/target 3 | google-key.json 4 | project_goes_here/dbt_packages -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple dbt Runners 2 | 3 | dbt Cloud: who needs it? With this repo, not you! 4 | 5 | This repo gives you the ability to run dbt in production using GitHub Actions. There are several basic GH Action workflows you can take and modify for your needs: 6 | 7 | - run dbt commands on a [schedule](https://github.com/C00ldudeNoonan/simple-dbt-runner/blob/main/.github/workflows/run_dbt_on_cron.yml) 8 | - run dbt after merging in a PR to the main branch (we recommend only choosing one of these at a time) 9 | - [full run](https://github.com/C00ldudeNoonan/simple-dbt-runner/blob/main/.github/workflows/run_dbt_on_merge.yml) 10 | - [state-aware run](https://github.com/C00ldudeNoonan/simple-dbt-runner/blob/main/.github/workflows/run_incremental_dbt_on_merge.yml) (only modified models) 11 | - dbt CI runs on PR commits to make sure your changes will work 12 | 13 | The state-aware workflow will look for the `manifest.json` file in a branch called `gh-pages`. We also take advantage of that branch to host your project's documentation website. 14 | 15 | ## How To Set Up Your dbt Project 16 | 17 | 1. Fork this repo and copy your whole dbt project into the `project_goes_here` folder. 18 | 2. Create a [Personal Access Token](https://github.com/settings/tokens?type=beta) with Workflows (Read/Write) permission and add it to the repository action secrets with key WORKFLOW_TOKEN 19 | 3. Update your repository settings to allow GitHub Actions to create PRs. This setting can be found in a repository's settings under Actions > General > Workflow permissions. [This is what it should look like.](https://user-images.githubusercontent.com/21294829/263915123-512bf335-6796-4ae3-a7dc-ad1cf6c4035f.png) 20 | 4. Go to the Actions tab and run the `Project Setup` workflow, making sure to select the type of database you want to set up 21 | - This opens a PR with our suggested changes to your `profiles.yml` and `requirements.txt` files. 22 | - We assume if you're migrating to self-hosting you need to add a prod target to your `profiles.yml` file, so this action will do that for you and also add the db driver you indicate. 23 | - FYI we also assume you have a `profiles.yml` file. 24 | 5. Add some environment variables to your GitHub Actions secrets in the Settings tab. You can see which vars are needed based on anything appended with `${{ secrets.` in the open PR. You might need to slightly edit this PR based on your project setup. 25 | 6. Run the `Manual dbt Run` to test that you're good to go. 26 | 7. Edit the Actions you want to keep and delete the ones you don't 27 | 28 | # dbt Documentation 29 | 30 | dbt documentation is pushed to Github Pages. If you are using Github Enterprise, the pages are automatically secured behind Github SSO. Hosting your dbt docs is highly contextual based on your organization. There are proven patterns for shipping dbt docs to netlify, confluence and many other targets. 31 | 32 | If using GH Pages, the only manual configuration required for hosting your dbt docs is to set it to run off the root directory of the `gh-pages` branch. You can configure this in your Github repo's Settings > Pages. Once you set that up it will looks like [this](https://c00ldudenoonan.github.io/simple-dbt-runner/#!/overview) 33 | 34 | **WARNING**: if you do not have Gitub Enterprise and you set up the documentation hosting your page might be publicly accessible. Please review [their docs](https://pages.github.com/). 35 | 36 | # Acknowledgements & Notes 37 | 38 | Thank you to [dwreeves](https://github.com/dwreeves) for both highlighting an issue with initial deployment with regard to public s3 buckets as well as providing the [template for deploying to github pages](https://github.com/dwreeves/dbt_docs_ghpages_example). 39 | -------------------------------------------------------------------------------- /b64.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | with open('/Users/geoffkaufman/Downloads/bicycle-health-dev-ba0e5cffcad9.json', 'r') as f: 4 | sample_string = f.read() 5 | sample_string_bytes = sample_string.encode("ascii") 6 | 7 | base64_bytes = base64.b64encode(sample_string_bytes) 8 | base64_string = base64_bytes.decode("ascii") 9 | 10 | print(f"Encoded string: {base64_string}") -------------------------------------------------------------------------------- /get_manifest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | git fetch 5 | 6 | # try to get the manifest from the gh-pages branch 7 | git checkout origin/gh-pages manifest.json 8 | 9 | # create the prev_run_state directory if it doesn't exist 10 | if [ ! -d "project_goes_here/prev_run_state" ]; then 11 | mkdir project_goes_here/prev_run_state 12 | fi 13 | 14 | # copy the manifest.json to the target directory 15 | cp manifest.json project_goes_here/prev_run_state/manifest.json 16 | -------------------------------------------------------------------------------- /print_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | with open('/Users/geoffkaufman/Downloads/bicycle-health-dev-ba0e5cffcad9.json', 'r') as f: 3 | d = json.loads(f.read()) 4 | print(json.dumps(d)) -------------------------------------------------------------------------------- /project_goes_here/.python-version: -------------------------------------------------------------------------------- 1 | 3.8.0 2 | -------------------------------------------------------------------------------- /project_goes_here/.user.yml: -------------------------------------------------------------------------------- 1 | id: 622092df-b5c2-44cc-802b-56cf453e98d6 2 | -------------------------------------------------------------------------------- /project_goes_here/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /project_goes_here/README.md: -------------------------------------------------------------------------------- 1 | ## Testing dbt project: `jaffle_shop` 2 | 3 | `jaffle_shop` is a fictional ecommerce store. This dbt project transforms raw data from an app database into a customers and orders model ready for analytics. 4 | 5 | ### What is this repo? 6 | What this repo _is_: 7 | - A self-contained playground dbt project, useful for testing out scripts, and communicating some of the core dbt concepts. 8 | 9 | What this repo _is not_: 10 | - A tutorial — check out the [Getting Started Tutorial](https://docs.getdbt.com/tutorial/setting-up) for that. Notably, this repo contains some anti-patterns to make it self-contained, namely the use of seeds instead of sources. 11 | - A demonstration of best practices — check out the [dbt Learn Demo](https://github.com/dbt-labs/dbt-learn-demo) repo instead. We want to keep this project as simple as possible. As such, we chose not to implement: 12 | - our standard file naming patterns (which make more sense on larger projects, rather than this five-model project) 13 | - a pull request flow 14 | - CI/CD integrations 15 | - A demonstration of using dbt for a high-complex project, or a demo of advanced features (e.g. macros, packages, hooks, operations) — we're just trying to keep things simple here! 16 | 17 | ### What's in this repo? 18 | This repo contains [seeds](https://docs.getdbt.com/docs/building-a-dbt-project/seeds) that includes some (fake) raw data from a fictional app. 19 | 20 | The raw data consists of customers, orders, and payments, with the following entity-relationship diagram: 21 | 22 | ![Jaffle Shop ERD](/etc/jaffle_shop_erd.png) 23 | 24 | 25 | ### Running this project 26 | To get up and running with this project: 27 | 1. Install dbt using [these instructions](https://docs.getdbt.com/docs/installation). 28 | 29 | 2. Clone this repository. 30 | 31 | 3. Change into the `jaffle_shop` directory from the command line: 32 | ```bash 33 | $ cd jaffle_shop 34 | ``` 35 | 36 | 4. Set up a profile called `jaffle_shop` to connect to a data warehouse by following [these instructions](https://docs.getdbt.com/docs/configure-your-profile). If you have access to a data warehouse, you can use those credentials – we recommend setting your [target schema](https://docs.getdbt.com/docs/configure-your-profile#section-populating-your-profile) to be a new schema (dbt will create the schema for you, as long as you have the right privileges). If you don't have access to an existing data warehouse, you can also setup a local postgres database and connect to it in your profile. 37 | 38 | 5. Ensure your profile is setup correctly from the command line: 39 | ```bash 40 | $ dbt debug 41 | ``` 42 | 43 | 6. Load the CSVs with the demo data set. This materializes the CSVs as tables in your target schema. Note that a typical dbt project **does not require this step** since dbt assumes your raw data is already in your warehouse. 44 | ```bash 45 | $ dbt seed 46 | ``` 47 | 48 | 7. Run the models: 49 | ```bash 50 | $ dbt run 51 | ``` 52 | 53 | > **NOTE:** If this steps fails, it might mean that you need to make small changes to the SQL in the models folder to adjust for the flavor of SQL of your target database. Definitely consider this if you are using a community-contributed adapter. 54 | 55 | 8. Test the output of the models: 56 | ```bash 57 | $ dbt test 58 | ``` 59 | 60 | 9. Generate documentation for the project: 61 | ```bash 62 | $ dbt docs generate 63 | ``` 64 | 65 | 10. View the documentation for the project: 66 | ```bash 67 | $ dbt docs serve 68 | ``` 69 | 70 | ### What is a jaffle? 71 | A jaffle is a toasted sandwich with crimped, sealed edges. Invented in Bondi in 1949, the humble jaffle is an Australian classic. The sealed edges allow jaffle-eaters to enjoy liquid fillings inside the sandwich, which reach temperatures close to the core of the earth during cooking. Often consumed at home after a night out, the most classic filling is tinned spaghetti, while my personal favourite is leftover beef stew with melted cheese. 72 | 73 | --- 74 | For more information on dbt: 75 | - Read the [introduction to dbt](https://docs.getdbt.com/docs/introduction). 76 | - Read the [dbt viewpoint](https://docs.getdbt.com/docs/about/viewpoint). 77 | - Join the [dbt community](http://community.getdbt.com/). 78 | --- 79 | -------------------------------------------------------------------------------- /project_goes_here/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'jaffle_shop' 2 | 3 | config-version: 2 4 | version: '0.1' 5 | 6 | profile: 'jaffle_shop' 7 | 8 | model-paths: ["models"] 9 | seed-paths: ["seeds"] 10 | test-paths: ["tests"] 11 | analysis-paths: ["analysis"] 12 | macro-paths: ["macros"] 13 | 14 | target-path: "target" 15 | clean-targets: 16 | - "target" 17 | - "dbt_modules" 18 | - "logs" 19 | 20 | require-dbt-version: [">=1.0.0", "<2.0.0"] 21 | 22 | models: 23 | jaffle_shop: 24 | materialized: table 25 | staging: 26 | materialized: view 27 | -------------------------------------------------------------------------------- /project_goes_here/etc/dbdiagram_definition.txt: -------------------------------------------------------------------------------- 1 | Table orders { 2 | id int PK 3 | user_id int 4 | order_date date 5 | status varchar 6 | } 7 | 8 | Table payments { 9 | id int 10 | order_id int 11 | payment_method int 12 | amount int 13 | } 14 | 15 | Table customers { 16 | id int PK 17 | first_name varchar 18 | last_name varchar 19 | } 20 | 21 | Ref: orders.user_id > customers.id 22 | 23 | Ref: payments.order_id > orders.id 24 | -------------------------------------------------------------------------------- /project_goes_here/etc/jaffle_shop_erd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C00ldudeNoonan/simple-dbt-runner/d836935cf0ab96a83c0d00b99e8dca04ae701b30/project_goes_here/etc/jaffle_shop_erd.png -------------------------------------------------------------------------------- /project_goes_here/macros/drop_orphanate_tables.sql: -------------------------------------------------------------------------------- 1 | -- Execute with: dbt run-operation drop_old_relations --args '{"dry_run": True}' 2 | -- to run the delete, run w/o the args 3 | -- this simplified version simply drops schemas that start with PR 4 | 5 | {% macro drop_old_relations(dry_run='false') %} 6 | {% set cleanup_query %} 7 | with models_to_drop as ( 8 | select 9 | distinct table_schema 10 | from 11 | {{ target.database }}.information_schema.tables 12 | where table_schema like 'PR%' 13 | ) 14 | select 15 | CONCAT( 'drop schema "' , table_schema , '" cascade;' ) as drop_commands 16 | from 17 | models_to_drop 18 | order by drop_commands desc 19 | {% endset %} 20 | {% do log(cleanup_query, info=True) %} 21 | {% set drop_commands = run_query(cleanup_query).columns[0].values() %} 22 | {% if drop_commands %} 23 | {% for drop_command in drop_commands %} 24 | {% do log(drop_command, True) %} 25 | {% if dry_run == 'false' %} 26 | {% do run_query(drop_command) %} 27 | {% endif %} 28 | {% endfor %} 29 | {% else %} 30 | {% do log('No relations to clean.', True) %} 31 | {% endif %} 32 | {%- endmacro -%} -------------------------------------------------------------------------------- /project_goes_here/models/customers.sql: -------------------------------------------------------------------------------- 1 | with customers as ( 2 | 3 | select * from {{ ref('stg_customers') }} 4 | 5 | ), 6 | 7 | orders as ( 8 | 9 | select * from {{ ref('stg_orders') }} 10 | 11 | ), 12 | 13 | payments as ( 14 | 15 | select * from {{ ref('stg_payments') }} 16 | 17 | ), 18 | 19 | customer_orders as ( 20 | 21 | select 22 | customer_id, 23 | 24 | min(order_date) as first_order, 25 | max(order_date) as most_recent_order, 26 | count(order_id) as number_of_orders 27 | from orders 28 | 29 | group by customer_id 30 | 31 | ), 32 | 33 | customer_payments as ( 34 | 35 | select 36 | orders.customer_id, 37 | sum(amount) as total_amount 38 | 39 | from payments 40 | 41 | left join orders on 42 | payments.order_id = orders.order_id 43 | 44 | group by orders.customer_id 45 | 46 | ), 47 | 48 | final as ( 49 | 50 | select 51 | customers.customer_id, 52 | customers.first_name, 53 | customers.last_name, 54 | customer_orders.first_order, 55 | customer_orders.most_recent_order, 56 | customer_orders.number_of_orders, 57 | customer_payments.total_amount as customer_lifetime_value 58 | 59 | from customers 60 | 61 | left join customer_orders 62 | on customers.customer_id = customer_orders.customer_id 63 | 64 | left join customer_payments 65 | on customers.customer_id = customer_payments.customer_id 66 | 67 | ) 68 | 69 | select * from customers 70 | -------------------------------------------------------------------------------- /project_goes_here/models/docs.md: -------------------------------------------------------------------------------- 1 | {% docs orders_status %} 2 | 3 | Orders can be one of the following statuses: 4 | 5 | | status | description | 6 | |----------------|------------------------------------------------------------------------------------------------------------------------| 7 | | placed | The order has been placed but has not yet left the warehouse | 8 | | shipped | The order has ben shipped to the customer and is currently in transit | 9 | | completed | The order has been received by the customer | 10 | | return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse | 11 | | returned | The order has been returned by the customer and received at the warehouse | 12 | 13 | 14 | {% enddocs %} 15 | -------------------------------------------------------------------------------- /project_goes_here/models/orders.sql: -------------------------------------------------------------------------------- 1 | {% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} 2 | 3 | with orders as ( 4 | 5 | select * from {{ ref('stg_orders') }} 6 | 7 | ), 8 | 9 | payments as ( 10 | 11 | select * from {{ ref('stg_payments') }} 12 | 13 | ), 14 | 15 | order_payments as ( 16 | 17 | select 18 | order_id, 19 | 20 | {% for payment_method in payment_methods -%} 21 | sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, 22 | {% endfor -%} 23 | 24 | sum(amount) as total_amount 25 | 26 | from payments 27 | 28 | group by order_id 29 | 30 | ), 31 | 32 | final as ( 33 | 34 | select 35 | orders.order_id, 36 | orders.customer_id, 37 | orders.order_date, 38 | orders.status, 39 | 40 | {% for payment_method in payment_methods -%} 41 | 42 | order_payments.{{ payment_method }}_amount, 43 | 44 | {% endfor -%} 45 | 46 | order_payments.total_amount as amount 47 | 48 | from orders 49 | 50 | 51 | left join order_payments 52 | on orders.order_id = order_payments.order_id 53 | 54 | ) 55 | 56 | select * from final 57 | -------------------------------------------------------------------------------- /project_goes_here/models/overview.md: -------------------------------------------------------------------------------- 1 | {% docs __overview__ %} 2 | 3 | ## Data Documentation for Jaffle Shop 4 | 5 | `jaffle_shop` is a fictional ecommerce store. 6 | 7 | This [dbt](https://www.getdbt.com/) project is for testing out code. 8 | 9 | The source code can be found [here](https://github.com/clrcrl/jaffle_shop). 10 | 11 | {% enddocs %} 12 | -------------------------------------------------------------------------------- /project_goes_here/models/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: customers 5 | description: This table has basic information about a customer, as well as some derived facts based on a customer's orders 6 | 7 | columns: 8 | - name: customer_id 9 | description: This is a unique identifier for a customer 10 | tests: 11 | - unique 12 | - not_null 13 | 14 | - name: first_name 15 | description: Customer's first name. PII. 16 | 17 | - name: last_name 18 | description: Customer's last name. PII. 19 | 20 | - name: first_order 21 | description: Date (UTC) of a customer's first order 22 | 23 | - name: most_recent_order 24 | description: Date (UTC) of a customer's most recent order 25 | 26 | - name: number_of_orders 27 | description: Count of the number of orders a customer has placed 28 | 29 | - name: total_order_amount 30 | description: Total value (AUD) of a customer's orders 31 | 32 | - name: orders 33 | description: This table has basic information about orders, as well as some derived facts based on payments 34 | 35 | columns: 36 | - name: order_id 37 | tests: 38 | - unique 39 | - not_null 40 | description: This is a unique identifier for an order 41 | 42 | - name: customer_id 43 | description: Foreign key to the customers table 44 | tests: 45 | - not_null 46 | - relationships: 47 | to: ref('customers') 48 | field: customer_id 49 | 50 | - name: order_date 51 | description: Date (UTC) that the order was placed 52 | 53 | - name: status 54 | description: '{{ doc("orders_status") }}' 55 | tests: 56 | - accepted_values: 57 | values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] 58 | 59 | - name: amount 60 | description: Total amount (AUD) of the order 61 | tests: 62 | - not_null 63 | 64 | - name: credit_card_amount 65 | description: Amount of the order (AUD) paid for by credit card 66 | tests: 67 | - not_null 68 | 69 | - name: coupon_amount 70 | description: Amount of the order (AUD) paid for by coupon 71 | tests: 72 | - not_null 73 | 74 | - name: bank_transfer_amount 75 | description: Amount of the order (AUD) paid for by bank transfer 76 | tests: 77 | - not_null 78 | 79 | - name: gift_card_amount 80 | description: Amount of the order (AUD) paid for by gift card 81 | tests: 82 | - not_null 83 | -------------------------------------------------------------------------------- /project_goes_here/models/staging/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_customers 5 | columns: 6 | - name: customer_id 7 | tests: 8 | - unique 9 | - not_null 10 | 11 | - name: stg_orders 12 | columns: 13 | - name: order_id 14 | tests: 15 | - unique 16 | - not_null 17 | - name: status 18 | tests: 19 | - accepted_values: 20 | values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] 21 | 22 | - name: stg_payments 23 | columns: 24 | - name: payment_id 25 | tests: 26 | - unique 27 | - not_null 28 | - name: payment_method 29 | tests: 30 | - accepted_values: 31 | values: ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] 32 | -------------------------------------------------------------------------------- /project_goes_here/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Normally we would select from the table here, but we are using seeds to load 5 | our data in this project 6 | #} 7 | select * from {{ ref('raw_customers') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as customer_id, 15 | first_name, 16 | last_name, 17 | 'test' as test_col 18 | 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /project_goes_here/models/staging/stg_orders.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Normally we would select from the table here, but we are using seeds to load 5 | our data in this project 6 | #} 7 | select * from {{ ref('raw_orders') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as order_id, 15 | user_id as customer_id, 16 | order_date, 17 | status, 18 | 'test' as test_col 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /project_goes_here/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Normally we would select from the table here, but we are using seeds to load 5 | our data in this project 6 | #} 7 | select * from {{ ref('raw_payments') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as payment_id, 15 | order_id, 16 | payment_method, 17 | 18 | -- `amount` is currently stored in cents, so we convert it to dollars 19 | amount / 100 as amount 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed 26 | -------------------------------------------------------------------------------- /project_goes_here/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: "<=2.0.0" 4 | -------------------------------------------------------------------------------- /project_goes_here/profiles.yml: -------------------------------------------------------------------------------- 1 | jaffle_shop: 2 | target: prod 3 | outputs: 4 | prod: 5 | type: bigquery 6 | method: service-account 7 | project: '{{ env_var("PROJECT_NAME") }}' 8 | schema: '{{ env_var("DATASET") }}' 9 | threads: 4 10 | keyfile: ../google-key.json 11 | timeout_seconds: 600 12 | location: US # Optional, one of US or EU 13 | priority: interactive 14 | retries: 1 15 | dev: 16 | type: bigquery 17 | method: service-account 18 | project: '{{ env_var("PROJECT_NAME") }}' 19 | schema: '{{ env_var("DATASET") }}' 20 | threads: 4 21 | keyfile: ../google-key.json 22 | timeout_seconds: 600 23 | location: US # Optional, one of US or EU 24 | priority: interactive 25 | retries: 1 26 | -------------------------------------------------------------------------------- /project_goes_here/seeds/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C00ldudeNoonan/simple-dbt-runner/d836935cf0ab96a83c0d00b99e8dca04ae701b30/project_goes_here/seeds/.gitkeep -------------------------------------------------------------------------------- /project_goes_here/seeds/raw_customers.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name 2 | 1,Michael,P. 3 | 2,Shawn,M. 4 | 3,Kathleen,P. 5 | 4,Jimmy,C. 6 | 5,Katherine,R. 7 | 6,Sarah,R. 8 | 7,Martin,M. 9 | 8,Frank,R. 10 | 9,Jennifer,F. 11 | 10,Henry,W. 12 | 11,Fred,S. 13 | 12,Amy,D. 14 | 13,Kathleen,M. 15 | 14,Steve,F. 16 | 15,Teresa,H. 17 | 16,Amanda,H. 18 | 17,Kimberly,R. 19 | 18,Johnny,K. 20 | 19,Virginia,F. 21 | 20,Anna,A. 22 | 21,Willie,H. 23 | 22,Sean,H. 24 | 23,Mildred,A. 25 | 24,David,G. 26 | 25,Victor,H. 27 | 26,Aaron,R. 28 | 27,Benjamin,B. 29 | 28,Lisa,W. 30 | 29,Benjamin,K. 31 | 30,Christina,W. 32 | 31,Jane,G. 33 | 32,Thomas,O. 34 | 33,Katherine,M. 35 | 34,Jennifer,S. 36 | 35,Sara,T. 37 | 36,Harold,O. 38 | 37,Shirley,J. 39 | 38,Dennis,J. 40 | 39,Louise,W. 41 | 40,Maria,A. 42 | 41,Gloria,C. 43 | 42,Diana,S. 44 | 43,Kelly,N. 45 | 44,Jane,R. 46 | 45,Scott,B. 47 | 46,Norma,C. 48 | 47,Marie,P. 49 | 48,Lillian,C. 50 | 49,Judy,N. 51 | 50,Billy,L. 52 | 51,Howard,R. 53 | 52,Laura,F. 54 | 53,Anne,B. 55 | 54,Rose,M. 56 | 55,Nicholas,R. 57 | 56,Joshua,K. 58 | 57,Paul,W. 59 | 58,Kathryn,K. 60 | 59,Adam,A. 61 | 60,Norma,W. 62 | 61,Timothy,R. 63 | 62,Elizabeth,P. 64 | 63,Edward,G. 65 | 64,David,C. 66 | 65,Brenda,W. 67 | 66,Adam,W. 68 | 67,Michael,H. 69 | 68,Jesse,E. 70 | 69,Janet,P. 71 | 70,Helen,F. 72 | 71,Gerald,C. 73 | 72,Kathryn,O. 74 | 73,Alan,B. 75 | 74,Harry,A. 76 | 75,Andrea,H. 77 | 76,Barbara,W. 78 | 77,Anne,W. 79 | 78,Harry,H. 80 | 79,Jack,R. 81 | 80,Phillip,H. 82 | 81,Shirley,H. 83 | 82,Arthur,D. 84 | 83,Virginia,R. 85 | 84,Christina,R. 86 | 85,Theresa,M. 87 | 86,Jason,C. 88 | 87,Phillip,B. 89 | 88,Adam,T. 90 | 89,Margaret,J. 91 | 90,Paul,P. 92 | 91,Todd,W. 93 | 92,Willie,O. 94 | 93,Frances,R. 95 | 94,Gregory,H. 96 | 95,Lisa,P. 97 | 96,Jacqueline,A. 98 | 97,Shirley,D. 99 | 98,Nicole,M. 100 | 99,Mary,G. 101 | 100,Jean,M. 102 | -------------------------------------------------------------------------------- /project_goes_here/seeds/raw_orders.csv: -------------------------------------------------------------------------------- 1 | id,user_id,order_date,status 2 | 1,1,2018-01-01,returned 3 | 2,3,2018-01-02,completed 4 | 3,94,2018-01-04,completed 5 | 4,50,2018-01-05,completed 6 | 5,64,2018-01-05,completed 7 | 6,54,2018-01-07,completed 8 | 7,88,2018-01-09,completed 9 | 8,2,2018-01-11,returned 10 | 9,53,2018-01-12,completed 11 | 10,7,2018-01-14,completed 12 | 11,99,2018-01-14,completed 13 | 12,59,2018-01-15,completed 14 | 13,84,2018-01-17,completed 15 | 14,40,2018-01-17,returned 16 | 15,25,2018-01-17,completed 17 | 16,39,2018-01-18,completed 18 | 17,71,2018-01-18,completed 19 | 18,64,2018-01-20,returned 20 | 19,54,2018-01-22,completed 21 | 20,20,2018-01-23,completed 22 | 21,71,2018-01-23,completed 23 | 22,86,2018-01-24,completed 24 | 23,22,2018-01-26,return_pending 25 | 24,3,2018-01-27,completed 26 | 25,51,2018-01-28,completed 27 | 26,32,2018-01-28,completed 28 | 27,94,2018-01-29,completed 29 | 28,8,2018-01-29,completed 30 | 29,57,2018-01-31,completed 31 | 30,69,2018-02-02,completed 32 | 31,16,2018-02-02,completed 33 | 32,28,2018-02-04,completed 34 | 33,42,2018-02-04,completed 35 | 34,38,2018-02-06,completed 36 | 35,80,2018-02-08,completed 37 | 36,85,2018-02-10,completed 38 | 37,1,2018-02-10,completed 39 | 38,51,2018-02-10,completed 40 | 39,26,2018-02-11,completed 41 | 40,33,2018-02-13,completed 42 | 41,99,2018-02-14,completed 43 | 42,92,2018-02-16,completed 44 | 43,31,2018-02-17,completed 45 | 44,66,2018-02-17,completed 46 | 45,22,2018-02-17,completed 47 | 46,6,2018-02-19,completed 48 | 47,50,2018-02-20,completed 49 | 48,27,2018-02-21,completed 50 | 49,35,2018-02-21,completed 51 | 50,51,2018-02-23,completed 52 | 51,71,2018-02-24,completed 53 | 52,54,2018-02-25,return_pending 54 | 53,34,2018-02-26,completed 55 | 54,54,2018-02-26,completed 56 | 55,18,2018-02-27,completed 57 | 56,79,2018-02-28,completed 58 | 57,93,2018-03-01,completed 59 | 58,22,2018-03-01,completed 60 | 59,30,2018-03-02,completed 61 | 60,12,2018-03-03,completed 62 | 61,63,2018-03-03,completed 63 | 62,57,2018-03-05,completed 64 | 63,70,2018-03-06,completed 65 | 64,13,2018-03-07,completed 66 | 65,26,2018-03-08,completed 67 | 66,36,2018-03-10,completed 68 | 67,79,2018-03-11,completed 69 | 68,53,2018-03-11,completed 70 | 69,3,2018-03-11,completed 71 | 70,8,2018-03-12,completed 72 | 71,42,2018-03-12,shipped 73 | 72,30,2018-03-14,shipped 74 | 73,19,2018-03-16,completed 75 | 74,9,2018-03-17,shipped 76 | 75,69,2018-03-18,completed 77 | 76,25,2018-03-20,completed 78 | 77,35,2018-03-21,shipped 79 | 78,90,2018-03-23,shipped 80 | 79,52,2018-03-23,shipped 81 | 80,11,2018-03-23,shipped 82 | 81,76,2018-03-23,shipped 83 | 82,46,2018-03-24,shipped 84 | 83,54,2018-03-24,shipped 85 | 84,70,2018-03-26,placed 86 | 85,47,2018-03-26,shipped 87 | 86,68,2018-03-26,placed 88 | 87,46,2018-03-27,placed 89 | 88,91,2018-03-27,shipped 90 | 89,21,2018-03-28,placed 91 | 90,66,2018-03-30,shipped 92 | 91,47,2018-03-31,placed 93 | 92,84,2018-04-02,placed 94 | 93,66,2018-04-03,placed 95 | 94,63,2018-04-03,placed 96 | 95,27,2018-04-04,placed 97 | 96,90,2018-04-06,placed 98 | 97,89,2018-04-07,placed 99 | 98,41,2018-04-07,placed 100 | 99,85,2018-04-09,placed 101 | -------------------------------------------------------------------------------- /project_goes_here/seeds/raw_payments.csv: -------------------------------------------------------------------------------- 1 | id,order_id,payment_method,amount 2 | 1,1,credit_card,1000 3 | 2,2,credit_card,2000 4 | 3,3,coupon,100 5 | 4,4,coupon,2500 6 | 5,5,bank_transfer,1700 7 | 6,6,credit_card,600 8 | 7,7,credit_card,1600 9 | 8,8,credit_card,2300 10 | 9,9,gift_card,2300 11 | 10,9,bank_transfer,0 12 | 11,10,bank_transfer,2600 13 | 12,11,credit_card,2700 14 | 13,12,credit_card,100 15 | 14,13,credit_card,500 16 | 15,13,bank_transfer,1400 17 | 16,14,bank_transfer,300 18 | 17,15,coupon,2200 19 | 18,16,credit_card,1000 20 | 19,17,bank_transfer,200 21 | 20,18,credit_card,500 22 | 21,18,credit_card,800 23 | 22,19,gift_card,600 24 | 23,20,bank_transfer,1500 25 | 24,21,credit_card,1200 26 | 25,22,bank_transfer,800 27 | 26,23,gift_card,2300 28 | 27,24,coupon,2600 29 | 28,25,bank_transfer,2000 30 | 29,25,credit_card,2200 31 | 30,25,coupon,1600 32 | 31,26,credit_card,3000 33 | 32,27,credit_card,2300 34 | 33,28,bank_transfer,1900 35 | 34,29,bank_transfer,1200 36 | 35,30,credit_card,1300 37 | 36,31,credit_card,1200 38 | 37,32,credit_card,300 39 | 38,33,credit_card,2200 40 | 39,34,bank_transfer,1500 41 | 40,35,credit_card,2900 42 | 41,36,bank_transfer,900 43 | 42,37,credit_card,2300 44 | 43,38,credit_card,1500 45 | 44,39,bank_transfer,800 46 | 45,40,credit_card,1400 47 | 46,41,credit_card,1700 48 | 47,42,coupon,1700 49 | 48,43,gift_card,1800 50 | 49,44,gift_card,1100 51 | 50,45,bank_transfer,500 52 | 51,46,bank_transfer,800 53 | 52,47,credit_card,2200 54 | 53,48,bank_transfer,300 55 | 54,49,credit_card,600 56 | 55,49,credit_card,900 57 | 56,50,credit_card,2600 58 | 57,51,credit_card,2900 59 | 58,51,credit_card,100 60 | 59,52,bank_transfer,1500 61 | 60,53,credit_card,300 62 | 61,54,credit_card,1800 63 | 62,54,bank_transfer,1100 64 | 63,55,credit_card,2900 65 | 64,56,credit_card,400 66 | 65,57,bank_transfer,200 67 | 66,58,coupon,1800 68 | 67,58,gift_card,600 69 | 68,59,gift_card,2800 70 | 69,60,credit_card,400 71 | 70,61,bank_transfer,1600 72 | 71,62,gift_card,1400 73 | 72,63,credit_card,2900 74 | 73,64,bank_transfer,2600 75 | 74,65,credit_card,0 76 | 75,66,credit_card,2800 77 | 76,67,bank_transfer,400 78 | 77,67,credit_card,1900 79 | 78,68,credit_card,1600 80 | 79,69,credit_card,1900 81 | 80,70,credit_card,2600 82 | 81,71,credit_card,500 83 | 82,72,credit_card,2900 84 | 83,73,bank_transfer,300 85 | 84,74,credit_card,3000 86 | 85,75,credit_card,1900 87 | 86,76,coupon,200 88 | 87,77,credit_card,0 89 | 88,77,bank_transfer,1900 90 | 89,78,bank_transfer,2600 91 | 90,79,credit_card,1800 92 | 91,79,credit_card,900 93 | 92,80,gift_card,300 94 | 93,81,coupon,200 95 | 94,82,credit_card,800 96 | 95,83,credit_card,100 97 | 96,84,bank_transfer,2500 98 | 97,85,bank_transfer,1700 99 | 98,86,coupon,2300 100 | 99,87,gift_card,3000 101 | 100,87,credit_card,2600 102 | 101,88,credit_card,2900 103 | 102,89,bank_transfer,2200 104 | 103,90,bank_transfer,200 105 | 104,91,credit_card,1900 106 | 105,92,bank_transfer,1500 107 | 106,92,coupon,200 108 | 107,93,gift_card,2600 109 | 108,94,coupon,700 110 | 109,95,coupon,2400 111 | 110,96,gift_card,1700 112 | 111,97,bank_transfer,1400 113 | 112,98,bank_transfer,1000 114 | 113,99,credit_card,2400 115 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ghp-import==2.1.0 2 | ruamel.yaml==0.17.32 3 | # Put whatever adapter you are using here here 4 | dbt-bigquery==1.6.3 -------------------------------------------------------------------------------- /save_and_publish_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # create the docs directory if it doesn't exist 4 | if [ ! -d "docs" ]; then 5 | mkdir docs 6 | fi 7 | 8 | # copy the manifest.json, catalog.json, and index.html files to the docs directory 9 | cp project_goes_here/target/manifest.json docs/manifest.json 10 | cp project_goes_here/target/catalog.json docs/catalog.json 11 | cp project_goes_here/target/index.html docs/index.html 12 | 13 | # publish the docs directory to GitHub Pages using ghp-import 14 | ghp-import -n -p -f docs/ -------------------------------------------------------------------------------- /update_profile_with_prod.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import ruamel.yaml 3 | 4 | yaml = ruamel.yaml.YAML() 5 | yaml.preserve_quotes = True 6 | 7 | workflow_jobs = { 8 | 'run_dbt.yml': 'run_dbt', 9 | 'run_dbt_force.yml': 'run_dbt', 10 | 'run_dbt_on_cron.yml': 'dbt_scheduled_run', 11 | 'run_incremental_dbt_on_merge.yml': 'dbt_run_on_merge_incremental', 12 | 'run_dbt_on_pr.yml' : 'dbt_run_on_pr', 13 | 'run_dbt_cleanup.yml' : 'dbt_scheduled_cleanup_run' 14 | } 15 | 16 | # Load the profiles.yml file 17 | with open('project_goes_here/profiles.yml', 'r') as f: 18 | profiles = yaml.load(f) 19 | 20 | # get the profile name from dbt_project.yml 21 | with open('project_goes_here/dbt_project.yml', 'r') as f: 22 | dbt_project = yaml.load(f) 23 | profile_name = dbt_project['profile'] 24 | 25 | # Get the type of datawarehouse we are using 26 | dwh_type = sys.argv[1] 27 | print("dwh_type: ", dwh_type) 28 | 29 | # Add a new target called "prod" based on the type of datawarehouse 30 | if dwh_type == 'postgres': 31 | profiles[profile_name]['outputs']['prod'] = { 32 | 'type': 'postgres', 33 | 'host': '{{ env_var("HOST") }}', 34 | 'user': '{{ env_var("USERNAME") }}', 35 | 'password': '{{ env_var("PASSWORD") }}', 36 | 'port': '{{ env_var("PORT") | int }}', 37 | 'dbname': '{{ env_var("DATABASE") }}', 38 | 'schema': '{{ env_var("SCHEMA") }}', 39 | 'threads': 8 40 | } 41 | 42 | # update the gh actions jobs 43 | for file, job in workflow_jobs.items(): 44 | with open('.github/workflows/'+file, 'r') as f: 45 | data = yaml.load(f) 46 | 47 | # edit the env block for all the jobs 48 | data['jobs'][job]['env'] = { 49 | 'HOST': '${{ secrets.HOST }}', 50 | 'DATABASE': '${{ secrets.DATABASE }}', 51 | 'USERNAME': '${{ secrets.USERNAME }}', 52 | 'PASSWORD': '${{ secrets.PASSWORD }}', 53 | 'PORT': '${{ secrets.PORT }}', 54 | 'SCHEMA': '${{ secrets.SCHEMA }}' 55 | } 56 | 57 | with open('.github/workflows/'+file, 'w') as f: 58 | yaml.dump(data, f) 59 | 60 | elif dwh_type == 'snowflake': 61 | profiles[profile_name]['outputs']['prod'] = { 62 | 'type': 'snowflake', 63 | 'account': '{{ env_var("SNOWFLAKE_ACCOUNT") }}', 64 | 'role': '{{ env_var("SNOWFLAKE_ROLE") }}', 65 | 'user': '{{ env_var("USERNAME") }}', 66 | 'password': '{{ env_var("PASSWORD") }}', 67 | 'database': '{{ env_var("DATABASE") }}', 68 | 'schema': '{{ env_var("SCHEMA") }}', 69 | 'warehouse': '{{ env_var("WAREHOUSE") }}', 70 | 'threads': 8 71 | } 72 | 73 | # update the gh actions jobs 74 | for file, job in workflow_jobs.items(): 75 | with open('.github/workflows/'+file, 'r') as f: 76 | data = yaml.load(f) 77 | 78 | # edit the env block for all the jobs 79 | data['jobs'][job]['env'] = { 80 | 'SNOWFLAKE_ACCOUNT': '${{ secrets.SNOWFLAKE_ACCOUNT }}', 81 | 'DATABASE': '${{ secrets.DATABASE }}', 82 | 'SNOWFLAKE_ROLE': '${{ secrets.SNOWFLAKE_ROLE }}', 83 | 'USERNAME': '${{ secrets.USERNAME }}', 84 | 'PASSWORD': '${{ secrets.PASSWORD }}', 85 | 'SCHEMA': '${{ secrets.SCHEMA }}', 86 | 'WAREHOUSE': '${{ secrets.WAREHOUSE }}', 87 | } 88 | 89 | with open('.github/workflows/'+file, 'w') as f: 90 | yaml.dump(data, f) 91 | 92 | elif dwh_type == 'redshift': 93 | profiles[profile_name]['outputs']['prod'] = { 94 | 'type': 'redshift', 95 | 'host': '{{ env_var("HOST") }}', 96 | 'user': '{{ env_var("USERNAME") }}', 97 | 'password': '{{ env_var("PASSWORD") }}', 98 | 'port': '{{ env_var("PORT") | int }}', 99 | 'dbname': '{{ env_var("DATABASE") }}', 100 | 'schema': '{{ env_var("SCHEMA") }}', 101 | 'threads': 8 102 | } 103 | 104 | # update the gh actions jobs 105 | for file, job in workflow_jobs.items(): 106 | with open('.github/workflows/'+file, 'r') as f: 107 | data = yaml.load(f) 108 | 109 | # edit the env block for all the jobs 110 | data['jobs'][job]['env'] = { 111 | 'HOST': '${{ secrets.HOST }}', 112 | 'USERNAME': '${{ secrets.USERNAME }}', 113 | 'PASSWORD': '${{ secrets.PASSWORD }}', 114 | 'PORT': '${{ secrets.PORT }}', 115 | 'DATABASE': '${{ secrets.DATABASE }}', 116 | 'SCHEMA': '${{ secrets.SCHEMA }}', 117 | } 118 | 119 | with open('.github/workflows/'+file, 'w') as f: 120 | yaml.dump(data, f) 121 | 122 | elif dwh_type == 'bigquery': 123 | profiles[profile_name]['outputs']['prod'] = { 124 | 'type': 'bigquery', 125 | 'method': 'oauth', 126 | 'project': '{{ env_var("PROJECT_NAME") }}', 127 | 'dataset': '{{ env_var("DATASET") }}', 128 | 'threads': 8 129 | } 130 | 131 | 132 | # update the gh actions jobs 133 | for file, job in workflow_jobs.items(): 134 | with open('.github/workflows/'+file, 'r') as f: 135 | data = yaml.load(f) 136 | 137 | # edit the env block for all the jobs 138 | data['jobs'][job]['env'] = { 139 | 'PROJECT_NAME': '${{ secrets.PROJECT_NAME }}', 140 | 'DATASET': '${{ secrets.DATASET }}', 141 | } 142 | 143 | with open('.github/workflows/'+file, 'w') as f: 144 | yaml.dump(data, f) 145 | 146 | # Save the updated profiles.yml file 147 | with open('project_goes_here/profiles.yml', 'w') as f: 148 | yaml.dump(profiles, f) 149 | 150 | # update requirements.txt with the dbt adapter depending on the type of datawarehouse 151 | if dwh_type == 'postgres': 152 | with open('requirements.txt', 'a') as f: 153 | f.write("\ndbt-postgres==1.5.2") 154 | elif dwh_type == 'snowflake': 155 | with open('requirements.txt', 'a') as f: 156 | f.write("\ndbt-snowflake==1.5.3") 157 | elif dwh_type == 'redshift': 158 | with open('requirements.txt', 'a') as f: 159 | f.write("\ndbt-redshift==1.6.1") 160 | elif dwh_type == 'bigquery': 161 | with open('requirements.txt', 'a') as f: 162 | f.write("\ndbt-bigquery==1.6.3") 163 | --------------------------------------------------------------------------------