├── .github ├── pull_request_template.md └── workflows │ ├── autoscaling_ci.yml │ ├── dbt_cloud_webhooks.yml │ ├── dependent_jobs.yml │ ├── erd.yml │ ├── lint_sql.yml │ ├── multiple_ci.yml │ ├── pre_commit.yml │ ├── restart_from_failure.yml │ ├── run_and_log.yml │ ├── run_job_on_merge.yml │ └── tableau.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .sqlfluff ├── .vscode └── launch.json ├── README.md ├── analyses ├── analysis.yml └── customer_snapshot_query.sql ├── assets ├── README.md └── tpch_erd.png ├── dbt_project.yml ├── macros ├── block_on_tests.sql ├── cents_to_dollars.sql ├── convert_money.sql ├── create_udfs.sql ├── drop_ci_schemas.sql ├── drop_old_nodes.sql ├── generate_base_models.sql ├── generate_models_yaml.sql ├── grant_all_on_schemas.sql ├── limit_data_in_dev.sql ├── macro.yml ├── omit_soft_deletions.sql ├── select_extremes.sql ├── snowflake │ ├── dont_do_this.sql │ ├── materializations │ │ └── incremental_custom.sql │ └── share_view.sql ├── standard_account_fields.sql ├── test_all_values_gte_zero.sql ├── udf_area_of_circle.sql └── unit_test_mode.sql ├── models ├── demo_examples │ ├── demo_examples.yml │ ├── external_sources.yml │ ├── materialization_incremental.sql │ └── use_variables.sql ├── marts │ ├── aggregates │ │ ├── agg_ship_modes_dynamic_pivot.sql │ │ ├── agg_ship_modes_hardcoded_pivot.sql │ │ ├── aggregates.yml │ │ └── exposures.yml │ ├── core │ │ ├── core.yml │ │ ├── dim_customers.sql │ │ ├── dim_parts.sql │ │ ├── dim_suppliers.sql │ │ ├── fct_order_items.sql │ │ ├── fct_orders.sql │ │ ├── fct_orders_stats_py.py │ │ └── fct_orders_stats_sql.sql │ ├── intermediate │ │ ├── intermediate.md │ │ ├── intermediate.yml │ │ ├── order_items.sql │ │ └── part_suppliers.sql │ └── marketing │ │ ├── _models.yml │ │ ├── int_segment__link_clicked.sql │ │ ├── int_segment__pages.sql │ │ └── int_segment__tracks.sql ├── metrics │ ├── revenue_weekly_by_ship_mode.sql │ └── tpch_metrics.yml ├── ml │ ├── forecast_score_py.py │ └── forecast_train_py.py ├── overview.md ├── staging │ ├── segment │ │ ├── _segment__sources.yml │ │ ├── dbtc │ │ │ ├── stg_dbtc__link_clicked.sql │ │ │ ├── stg_dbtc__pages.sql │ │ │ └── stg_dbtc__tracks.sql │ │ └── yahooquery │ │ │ ├── stg_yahooquery__link_clicked.sql │ │ │ ├── stg_yahooquery__pages.sql │ │ │ └── stg_yahooquery__tracks.sql │ └── tpch │ │ ├── _tpch__docs.md │ │ ├── _tpch__models.yml │ │ ├── _tpch__sources.yml │ │ ├── stg_tpch_customers.sql │ │ ├── stg_tpch_line_items.sql │ │ ├── stg_tpch_nations.sql │ │ ├── stg_tpch_orders.sql │ │ ├── stg_tpch_part_suppliers.sql │ │ ├── stg_tpch_parts.sql │ │ ├── stg_tpch_regions.sql │ │ └── stg_tpch_suppliers.sql ├── test_ads │ ├── fct_facebook_ads.sql │ └── fct_instagram_ads.sql └── utils │ ├── all_days.sql │ └── utils.yml ├── packages.yml ├── prod └── manifest.json ├── requirements.txt ├── scripts ├── clone_databases.py ├── create_profile.py ├── multiple_ci.py ├── run_and_erd.py └── run_and_log.py ├── seeds ├── country_codes.csv ├── data.yml └── snowflake_contract_rates.csv ├── snapshots └── tpch │ ├── tpch_customer_snapshot.sql │ ├── tpch_part_snapshot.sql │ └── tpch_supplier_snapshot.sql └── tests ├── macro_stg_tpch_orders_assert_pos_price.sql ├── macro_stg_tphc_suppliers_assert_pos_acct_bal.sql └── stg_tpch_orders_assert_positive_price.sql /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 7 | 8 | ## Description & motivation 9 | 13 | 14 | ## Screenshots: 15 | 19 | 20 | ## Validation of models: 21 | 26 | 27 | ## Changes to existing models: 28 | 33 | 34 | ## Checklist: 35 | 41 | - [ ] My pull request represents one logical piece of work. 42 | - [ ] My commits are related to the pull request and look clean. 43 | - [ ] My SQL follows the [Fishtown Analytics style guide](https://github.com/fishtown-analytics/corp/blob/master/dbt_coding_conventions.md). 44 | - [ ] I have materialized my models appropriately. 45 | - [ ] I have added appropriate tests and documentation to any new models. 46 | - [ ] I have updated the README file. -------------------------------------------------------------------------------- /.github/workflows/autoscaling_ci.yml: -------------------------------------------------------------------------------- 1 | name: Autoscaling dbt Cloud CI 2 | on: 3 | workflow_dispatch: 4 | # pull_request: 5 | # branches: 6 | # - main 7 | 8 | jobs: 9 | autoscaling: 10 | if: github.event.pull_request.draft == false 11 | runs-on: ubuntu-latest 12 | env: 13 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 14 | DBT_CLOUD_ACCOUNT_ID: 43786 15 | JOB_ID: 73797 16 | PULL_REQUEST_ID: ${{ github.event.number }} 17 | GIT_SHA: ${{ github.event.pull_request.head.sha }} 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - uses: actions/setup-python@v2 22 | with: 23 | python-version: "3.9.x" 24 | 25 | - name: Trigger Autoscaling CI Job 26 | run: | 27 | pip install dbtc==0.3.6 28 | SO="dbt_cloud_pr_"$JOB_ID"_"$PULL_REQUEST_ID 29 | run=$(dbtc trigger-autoscaling-ci-job \ 30 | --job-id="$JOB_ID" \ 31 | --payload='{"cause": "Autoscaling Slim CI!","git_sha":"'"$GIT_SHA"'","schema_override":"'"$SO"'","github_pull_request_id":'"$PULL_REQUEST_ID"'}' \ 32 | --no-should-poll) 33 | echo $run 34 | -------------------------------------------------------------------------------- /.github/workflows/dbt_cloud_webhooks.yml: -------------------------------------------------------------------------------- 1 | on: 2 | repository_dispatch: 3 | 4 | jobs: 5 | run_if_success: 6 | # if: ${{ github.event.run.fields.runStatus == '10' }} 7 | runs-on: ubuntu-latest 8 | steps: 9 | - env: 10 | MESSAGE: ${{ github.event.run.fields }} 11 | run: echo $MESSAGE 12 | -------------------------------------------------------------------------------- /.github/workflows/dependent_jobs.yml: -------------------------------------------------------------------------------- 1 | name: Dependent Jobs 2 | on: 3 | workflow_dispatch: 4 | # schedule: 5 | # - cron: "* * 0 0 0" 6 | 7 | jobs: 8 | dependent_jobs: 9 | runs-on: ubuntu-latest 10 | env: 11 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 12 | DBT_CLOUD_ACCOUNT_ID: 43786 13 | JOB_1_ID: 73796 14 | JOB_2_ID: 93955 15 | steps: 16 | - uses: actions/checkout@v2 17 | - uses: actions/setup-python@v2 18 | with: 19 | python-version: "3.9.x" 20 | 21 | - name: Trigger Job 1 22 | run: | 23 | pip install dbtc==0.3.4 24 | run=$(dbtc trigger-job \ 25 | --job-id=$JOB_1_ID \ 26 | --payload='{"cause": "GH Action - Dependent Jobs - Job 1"}') 27 | echo "STATUS=$(echo $run | jq '.data.status')" >> $GITHUB_ENV 28 | 29 | - name: Trigger Job 2 30 | if: env.STATUS == 10 31 | run: | 32 | dbtc trigger-job \ 33 | --job-id=$JOB_2_ID \ 34 | --payload='{"cause": "GH Action - Dependent Jobs - Job 2"}' \ 35 | --no-should-poll 36 | -------------------------------------------------------------------------------- /.github/workflows/erd.yml: -------------------------------------------------------------------------------- 1 | name: Create ERD 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | run_and_erd: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | 12 | - uses: actions/setup-python@v2 13 | with: 14 | python-version: "3.9.x" 15 | 16 | - name: Install graphviz 17 | run: sudo apt install graphviz-dev graphviz 18 | 19 | - name: Install dependencies 20 | run: pip install -r requirements.txt 21 | 22 | - name: Run Python 23 | run: DBT_CLOUD_JOB_ID=93955 DBT_CLOUD_ACCOUNT_ID=43786 python scripts/run_and_erd.py 24 | env: 25 | SF_ACCOUNT: ${{ secrets.SF_ACCOUNT }} 26 | SF_USER: ${{ secrets.SF_USER }} 27 | SF_PASSWORD: ${{ secrets.SF_PASSWORD }} 28 | DBT_CLOUD_API_KEY: ${{ secrets.DBT_CLOUD_API_KEY }} 29 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 30 | 31 | - name: Add file 32 | uses: EndBug/add-and-commit@v9 33 | with: 34 | add: '*.png' 35 | committer_name: Github Actions 36 | committer_email: actions@github.com 37 | message: 'Add ERDs' 38 | 39 | -------------------------------------------------------------------------------- /.github/workflows/lint_sql.yml: -------------------------------------------------------------------------------- 1 | name: sqlfluff with reviewdog 2 | on: 3 | workflow_dispatch: 4 | # pull_request: 5 | jobs: 6 | test-check: 7 | name: runner / sqlfluff (github-check) 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: yu-iskw/action-sqlfluff@v3 12 | id: lint-sql 13 | with: 14 | github_token: ${{ secrets.github_token }} 15 | reporter: github-pr-review 16 | sqlfluff_version: '0.13.1' 17 | sqlfluff_command: 'fix' # Or "lint" 18 | config: '${{ github.workspace }}/.sqlfluff' 19 | paths: '${{ github.workspace }}/models' 20 | dialect: 'snowflake' 21 | - name: 'Show outputs (Optional)' 22 | shell: bash 23 | run: | 24 | echo '${{ steps.lint-sql.outputs.sqlfluff-results }}' | jq -r '.' 25 | echo '${{ steps.lint-sql.outputs.sqlfluff-results-rdjson }}' | jq -r '.' -------------------------------------------------------------------------------- /.github/workflows/multiple_ci.yml: -------------------------------------------------------------------------------- 1 | name: Multiple CI Run 2 | on: 3 | workflow_dispatch: 4 | # pull_request: 5 | # branches: 6 | # - main 7 | # types: 8 | # - opened 9 | # - reopened 10 | # - synchronize 11 | # - ready_for_review 12 | 13 | jobs: 14 | multiple_ci: 15 | if: github.event.pull_request.draft == false 16 | runs-on: ubuntu-latest 17 | env: 18 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 19 | DBT_CLOUD_ACCOUNT_ID: 43786 20 | PULL_REQUEST_ID: ${{ github.event.number }} 21 | GIT_SHA: ${{ github.event.pull_request.head.sha }} 22 | GITHUB_TOKEN: ${{ github.token }} 23 | PR_COMMENT_URL: ${{ github.event.pull_request._links.comments.href }} 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | - uses: actions/setup-python@v2 28 | with: 29 | python-version: "3.9.x" 30 | 31 | - name: Run Python Script 32 | run: | 33 | pip install dbtc==0.3.3 34 | python scripts/multiple_ci.py 35 | -------------------------------------------------------------------------------- /.github/workflows/pre_commit.yml: -------------------------------------------------------------------------------- 1 | name: pre-commit-dbt 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | pre_commit_dbt: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: checkout 14 | uses: actions/checkout@v3 15 | 16 | - uses: actions/setup-python@v3 17 | with: 18 | python-version: "3.9.x" 19 | 20 | - id: file_changes 21 | uses: trilom/file-changes-action@v1.2.4 22 | with: 23 | output: ' ' 24 | 25 | - name: Install pre-commit 26 | run: python -m pip install "dbt-snowflake<1.4.0" pre-commit 27 | 28 | - name: Run pre-commit model checks 29 | env: 30 | SF_ACCOUNT: ${{ secrets.SF_ACCOUNT }} 31 | SF_USER: ${{ secrets.SF_USER }} 32 | SF_PASSWORD: ${{ secrets.SF_PASSWORD }} 33 | run: | 34 | mkdir ~/.dbt/ 35 | ./scripts/create_profile.py > ~/.dbt/profiles.yml 36 | pre-commit run --files ${{ steps.file_changes.outputs.files}} 37 | -------------------------------------------------------------------------------- /.github/workflows/restart_from_failure.yml: -------------------------------------------------------------------------------- 1 | name: Restart from Failure 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | restart: 7 | runs-on: ubuntu-latest 8 | env: 9 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 10 | DBT_CLOUD_ACCOUNT_ID: 43786 11 | JOB_ID: 73796 12 | if: github.actor == 'dpguthrie' 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: actions/setup-python@v2 16 | with: 17 | python-version: "3.9.x" 18 | 19 | - name: Restart Job from Failure 20 | run: | 21 | pip install dbtc 22 | run=$(dbtc trigger-job \ 23 | --job-id=$JOB_ID \ 24 | --payload='{"cause": "Restarting job from failure"}' \ 25 | --no-should-poll \ 26 | --restart-from-failure) 27 | -------------------------------------------------------------------------------- /.github/workflows/run_and_log.yml: -------------------------------------------------------------------------------- 1 | name: Run Job and Send Results to Datadog 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | run_and_log: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | with: 12 | fetch-depth: 0 13 | 14 | - uses: actions/setup-python@v2 15 | with: 16 | python-version: "3.9.x" 17 | 18 | - name: Install dependencies 19 | run: | 20 | pip install dbtc datadog_api_client 21 | 22 | - name: Run Python 23 | run: DBT_CLOUD_JOB_ID=73796 DD_SITE=datadoghq.com python scripts/run_and_log.py 24 | env: 25 | DBT_CLOUD_API_KEY: ${{ secrets.DBT_CLOUD_API_KEY }} 26 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 27 | DBT_CLOUD_ACCOUNT_ID: ${{ secrets.DBT_CLOUD_ACCOUNT_ID }} 28 | DD_API_KEY: ${{ secrets.DD_API_KEY }} 29 | 30 | - uses: actions/checkout@v2 31 | 32 | -------------------------------------------------------------------------------- /.github/workflows/run_job_on_merge.yml: -------------------------------------------------------------------------------- 1 | # This action will trigger an already created job on dbt Cloud 2 | # The important settings for the job include: 3 | # - defer - defer this job to itself (will always refer to the most recently modified version of production) 4 | # - commands - ensure you're using the state:modified+ selector to only run what's been modified 5 | # since the last successful run for the job you're deferring to 6 | 7 | name: Run Job on Merge 8 | on: 9 | push: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | run_on_merge: 15 | runs-on: ubuntu-latest 16 | env: 17 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 18 | DBT_CLOUD_ACCOUNT_ID: 43786 19 | JOB_ID: 128558 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - uses: actions/setup-python@v2 24 | with: 25 | python-version: "3.9.x" 26 | 27 | - name: Run Merge Job 28 | run: | 29 | pip install dbtc==0.4.2 30 | run=$(dbtc trigger-job \ 31 | --job-id=$JOB_ID \ 32 | --payload='{"cause": "Merging code to main"}') 33 | echo "RUN_ID=$(echo $run | jq '.data.id')" >> $GITHUB_ENV 34 | 35 | - name: Get manifest 36 | id: manifest 37 | run: | 38 | manifest=$(dbtc get-run-artifact \ 39 | --account-id=$DBT_CLOUD_ACCOUNT_ID \ 40 | --run-id=$RUN_ID \ 41 | --path=manifest.json) 42 | echo $manifest > prod/manifest.json 43 | 44 | - name: Add Manifest to Repo 45 | uses: EndBug/add-and-commit@v9 46 | with: 47 | add: '*.json' 48 | committer_name: Github Actions 49 | committer_email: actions@github.com 50 | message: 'Adding manifest from merge run' 51 | -------------------------------------------------------------------------------- /.github/workflows/tableau.yml: -------------------------------------------------------------------------------- 1 | name: Tableau Exposures 2 | on: 3 | workflow_dispatch: 4 | # schedule: 5 | # - cron: '55 23 * * *' 6 | 7 | jobs: 8 | run_and_expose: 9 | runs-on: ubuntu-latest 10 | env: 11 | DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} 12 | DBT_CLOUD_ACCOUNT_ID: 43786 13 | DBT_CLOUD_JOB_ID: 93955 14 | TABLEAU_USERNAME: douglas.p.guthrie@gmail.com 15 | TABLEAU_PASSWORD: ${{ secrets.TABLEAU_PASSWORD }} 16 | TABLEAU_SITE: dbtlabsdemo 17 | TABLEAU_URL: https://prod-useast-a.online.tableau.com/ 18 | steps: 19 | - uses: actions/checkout@v2 20 | - uses: actions/setup-python@v2 21 | with: 22 | python-version: "3.9.x" 23 | 24 | - name: Get most recent run 25 | id: recent_run 26 | run: | 27 | pip install dbtc 28 | run=$(dbtc list-runs \ 29 | --job-id=$DBT_CLOUD_JOB_ID \ 30 | --order-by='-id' \ 31 | --limit=1 \ 32 | --status=success) 33 | echo "RUN_ID=$(echo $run | jq '.data[0].id')" >> $GITHUB_ENV 34 | 35 | - name: Get manifest 36 | id: manifest 37 | run: | 38 | manifest=$(dbtc get-run-artifact \ 39 | --account-id=$DBT_CLOUD_ACCOUNT_ID \ 40 | --run-id=$RUN_ID \ 41 | --path=manifest.json) 42 | echo $manifest > manifest.json 43 | 44 | - name: Get exposures 45 | id: exposures 46 | run: | 47 | pip install git+https://github.com/dpguthrie/dbt-exposures-crawler 48 | python -m exposurescrawler.crawlers.tableau \ 49 | --manifest-path=manifest.json \ 50 | --dbt-package-name=tpch \ 51 | --tableau-ignore-projects Archive \ 52 | --verbose \ 53 | --yml-path=models/marts/core/tableau_exposures.yml 54 | 55 | - name: Add file 56 | uses: EndBug/add-and-commit@v9 57 | with: 58 | add: '*.yml' 59 | committer_name: Github Actions 60 | committer_email: actions@github.com 61 | message: 'Add tableau exposures' 62 | 63 | - name: Trigger a job 64 | id: trigger 65 | run: | 66 | run=$(dbtc trigger-job \ 67 | --job-id=$DBT_CLOUD_JOB_ID \ 68 | --payload='{"cause": "Tableau Exposures"}') 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dbt_modules/ 3 | dbt_packages/ 4 | logs/ 5 | .venv/ 6 | .vscode/ -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/datacoves/pre-commit-dbt 3 | rev: v1.0.0 4 | hooks: 5 | - id: dbt-deps 6 | - id: dbt-docs-generate 7 | - id: check-script-has-no-table-name 8 | - id: check-model-has-all-columns 9 | -------------------------------------------------------------------------------- /.sqlfluff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpguthrie/snowflake-dbt-demo-project/8e626c5bf41dd281a1e52b10641bd6ec8b411001/.sqlfluff -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": true 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Welcome to the dbt Labs demo dbt project! We use the [TPCH dataset](https://docs.snowflake.com/en/user-guide/sample-data-tpch.html) to create a sample project to emulate what a production project might look like! 2 | 3 | _ __ 4 | ____ ___ ____ _(_)___ ____/ /__ ____ ___ ____ 5 | / __ `__ \/ __ `/ / __ \ / __ / _ \/ __ `__ \/ __ \ 6 | / / / / / / /_/ / / / / / / /_/ / __/ / / / / / /_/ / 7 | /_/ /_/ /_/\__,_/_/_/ /_/ \__,_/\___/_/ /_/ /_/\____/ 8 | 9 | ## Special demos 10 | 11 | - **dbt-external-tables:** Manage database objects that read data external to the warehouse within dbt. See `models/demo_examples/external_sources.yml`. 12 | - **Lifecycle Notifications:** See examples of dbt Cloud Job Lifecycle Notifications [here](https://gist.github.com/boxysean/3166b3ac55801685b6d275e9a9ddd5ee). 13 | - **Pivot tables:** One example of creating a pivot table using Snowflake syntax, another example using Jinja. See `models/aggregates/agg_yearly_*.sql`. 14 | 15 | ## Codegen Examples 16 | 17 | The codegen package can be run via the IDE, by clicking the "Compile" button, or in the command line. 18 | 19 | ### Command Line 20 | 21 | The example below shows how we can generate yml for a particular source: 22 | 23 | ```bash 24 | dbt run-operation generate_source --args '{"schema_name": "tpch_sf001", "database_name": "raw", "generate_columns": "true", "include_descriptions": "true"}' 25 | ``` 26 | 27 | ### IDE 28 | 29 | Paste in the snippets below in your IDE and click "Compile". 30 | 31 | ### codegen.generate_source 32 | 33 | Generates lightweight YAML for a Source 34 | ```sql 35 | {{ 36 | codegen.generate_source( 37 | schema_name='tpch_sf001', 38 | database_name='raw', 39 | generate_columns='true', 40 | include_descriptions='true', 41 | ) 42 | }} 43 | ``` 44 | 45 | ### codegen.generate_base_model 46 | 47 | Generates SQL for a staging model 48 | ```sql 49 | {{ 50 | codegen.generate_base_model( 51 | source_name='tpch', 52 | table_name='orders', 53 | ) 54 | }} 55 | ``` 56 | 57 | ### codegen.generate_model_yaml 58 | 59 | Generates the YAML for a given model 60 | ```sql 61 | {{ 62 | codegen.generate_model_yaml( 63 | model_name='stg_tpch_orders' 64 | ) 65 | }} 66 | ``` 67 | 68 | Generates the YAML for multiple models 69 | ```sql 70 | {{ 71 | generate_models_yaml( 72 | model_names=[ 73 | 'stg_tpch_orders', 74 | 'stg_tpch_parts', 75 | 'stg_tpch_regions', 76 | ] 77 | ) 78 | }} 79 | ``` 80 | 81 | change -------------------------------------------------------------------------------- /analyses/analysis.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: select_from_orders_example 5 | description: > 6 | This is an example of an analysis model. 7 | dbt's notion of models makes it easy for data teams to version control and collaborate on data transformations. 8 | Sometimes though, a certain sql statement doesn't quite fit into the mold of a dbt model. 9 | These more "analytical" sql files can be versioned inside of your dbt project using the analysis functionality of dbt. -------------------------------------------------------------------------------- /analyses/customer_snapshot_query.sql: -------------------------------------------------------------------------------- 1 | with counts as ( 2 | select c_custkey, count(*) as customer_count 3 | from {{ ref('tpch_customer_snapshot') }} 4 | group by 1 5 | having customer_count > 1 6 | ) 7 | 8 | select a.* 9 | from {{ ref('tpch_customer_snapshot') }} a 10 | join counts b on a.c_custkey = b.c_custkey 11 | order by c_custkey, dbt_valid_from 12 | -------------------------------------------------------------------------------- /assets/README.md: -------------------------------------------------------------------------------- 1 | # Assets 2 | 3 | This folder is to be used for files that should be included in our documentation. -------------------------------------------------------------------------------- /assets/tpch_erd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpguthrie/snowflake-dbt-demo-project/8e626c5bf41dd281a1e52b10641bd6ec8b411001/assets/tpch_erd.png -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'tpch' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'tpch' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `source-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | model-paths: ["models"] 16 | analysis-paths: ["analyses"] 17 | test-paths: ["tests"] 18 | seed-paths: ["seeds"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | asset-paths: ["assets"] 22 | 23 | 24 | target-path: "target" # directory which will store compiled SQL files 25 | clean-targets: # directories to be removed by `dbt clean` 26 | - "target" 27 | - "dbt_modules" 28 | - "dbt_packages" 29 | 30 | vars: 31 | start_date: '1999-01-01' 32 | disable_dbt_artifacts_autoupload: "{% if target.name == 'prod' %}false{% else %}true{% endif %}" 33 | disable_run_results: "{% if target.name == 'prod' %}false{% else %}true{% endif %}" 34 | disable_tests_results: "{% if target.name == 'prod' %}false{% else %}true{% endif %}" 35 | disable_dbt_invocation_autoupload: "{% if target.name == 'prod' %}false{% else %}true{% endif %}" 36 | 37 | # Configuring models 38 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 39 | 40 | models: 41 | use_anonymous_sproc: True 42 | 43 | +materialized: table 44 | 45 | tpch: 46 | staging: 47 | +materialized: view 48 | 49 | marts: 50 | core: 51 | +materialized: table 52 | ml: 53 | +enabled: false 54 | 55 | seeds: 56 | tpch: 57 | snowflake_contract_rates: 58 | +column_types: 59 | effective_date: DATE 60 | rate: NUMBER -------------------------------------------------------------------------------- /macros/block_on_tests.sql: -------------------------------------------------------------------------------- 1 | {% macro block_on_tests(list_of_test_macros) %} 2 | 3 | {% if execute %} 4 | {% for test in list_of_test_macros %} 5 | {% set results = run_query(test) %} 6 | {% if results[0][0] == 0 %} 7 | -- TEST {{loop.index}}: PASS 8 | {% else %} 9 | {% set msg %} TEST {{loop.index}}: FAIL {% endset %} 10 | {% do exceptions.raise_compiler_error(msg) %} 11 | {% endif %} 12 | {% endfor %} 13 | {% endif %} 14 | 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /macros/cents_to_dollars.sql: -------------------------------------------------------------------------------- 1 | {# A basic example for a project-wide macro to cast a column uniformly #} 2 | 3 | {% macro cents_to_dollars(column_name, precision=2) -%} 4 | ({{ column_name }} / 100)::numeric(16, {{ precision }}) 5 | {%- endmacro %} 6 | -------------------------------------------------------------------------------- /macros/convert_money.sql: -------------------------------------------------------------------------------- 1 | {% macro money(col) -%} 2 | ::decimal(16,4) 3 | {%- endmacro %} 4 | 5 | 6 | -------------------------------------------------------------------------------- /macros/create_udfs.sql: -------------------------------------------------------------------------------- 1 | {% macro create_udfs() %} 2 | 3 | {% do run_query(create_area_of_circle()) %} 4 | 5 | {% endmacro %} -------------------------------------------------------------------------------- /macros/drop_ci_schemas.sql: -------------------------------------------------------------------------------- 1 | {# drop_ci_schemas 2 | 3 | This macro drops all the schemas within a database beginning with "DBT_CLOUD_PR_". Use the dry_run param to see the schemas that will be dropped before dropping them. 4 | 5 | Args: 6 | - database: string -- the name of the database to clean. By default the target.database is used 7 | - dry_run: bool -- dry run flag. When dry_run is true, the cleanup commands are printed to stdout rather than executed. This is true by default 8 | 9 | Example 1 - dry run of current database 10 | dbt run-operation drop_ci_schemas 11 | 12 | Example 2 - actual run of current database 13 | dbt run-operation drop_ci_schemas --args '{dry_run: False}' 14 | 15 | Example 3 - drop CI schemas from a different database 16 | dbt run-operation drop_ci_schemas --args '{database: my_database, dry_run: False}' 17 | 18 | #} 19 | {% macro drop_ci_schemas(database=target.database, dryrun=True) %} 20 | 21 | {% set all_schemas_to_drop = [] %} 22 | 23 | {% set drop_schema_sql %} 24 | 25 | {% set find_schemas_sql %} 26 | SELECT SCHEMA_NAME 27 | FROM {{ database | upper }}.INFORMATION_SCHEMA.SCHEMATA 28 | WHERE SCHEMA_NAME LIKE 'DBT_CLOUD_PR_%'; 29 | {% endset %} 30 | 31 | {% set schemas_to_drop = run_query(find_schemas_sql) %} 32 | 33 | {% for row in schemas_to_drop %} 34 | DROP SCHEMA {{ database | upper }}.{{ row[0] | upper }}; 35 | {% do all_schemas_to_drop.append('{}.{}'.format(database.upper(), row[0].upper())) %} 36 | {% endfor %} 37 | 38 | {% endset %} 39 | 40 | {% if all_schemas_to_drop %} 41 | {% if dryrun %} 42 | {% do log('*NOT* dropping {}'.format(all_schemas_to_drop), info=True) %} 43 | {% else %} 44 | {% do log('Dropping: {} ...'.format(all_schemas_to_drop), info=True) %} 45 | {% do run_query(drop_schema_sql) %} 46 | {% do log('Done.', info=True) %} 47 | {% endif %} 48 | {% else %} 49 | {% do log('No schemas to drop!', info=True) %} 50 | {% endif %} 51 | 52 | {% endmacro %} 53 | -------------------------------------------------------------------------------- /macros/drop_old_nodes.sql: -------------------------------------------------------------------------------- 1 | -- Drop tables that are no longer used 2 | {% macro drop_old_nodes(dryrun=True) %} 3 | 4 | {% set nodes = graph.nodes.values() | list %} 5 | 6 | {% set dbs = nodes | map(attribute='database') | unique %} 7 | 8 | {% set all_tables_to_drop = [] %} 9 | 10 | {% set drop_table_sql %} 11 | 12 | {% for db in dbs %} 13 | 14 | -- Database: {{ db }} 15 | 16 | {% set schemas = nodes | selectattr('database', '==', db) | map(attribute='schema') | unique %} 17 | 18 | {% for schema in schemas %} 19 | 20 | -- Schema: {{ schema }} 21 | 22 | {% set model_names = nodes | selectattr('database', '==', db) | selectattr('schema', '==', schema) | map(attribute='name') | map('upper') | join("', '") %} 23 | 24 | {% set find_tables_sql %} 25 | USE DATABASE {{ db }}; 26 | SELECT DISTINCT TABLE_NAME, TABLE_TYPE 27 | FROM INFORMATION_SCHEMA.TABLES 28 | WHERE TABLE_SCHEMA = '{{ schema | upper }}' 29 | AND TABLE_TYPE IN ('VIEW', 'BASE TABLE') 30 | AND TABLE_NAME NOT IN ('{{ model_names }}'); 31 | {% endset %} 32 | {% set tables_to_drop = run_query(find_tables_sql) %} 33 | 34 | {% for row in tables_to_drop %} 35 | DROP {% if row[1] == 'BASE TABLE' %}TABLE{% else %}VIEW{% endif %} {{ db | upper }}.{{ schema | upper }}.{{ row[0] }}; 36 | {% do all_tables_to_drop.append('{}.{}.{}'.format(db.upper(), schema.upper(), row[0].upper())) %} 37 | {% endfor %} 38 | 39 | 40 | {% endfor %} 41 | {% endfor %} 42 | 43 | {% endset %} 44 | 45 | {% if all_tables_to_drop %} 46 | {% if dryrun %} 47 | {% do log('*NOT* dropping {}'.format(all_tables_to_drop), info=True) %} 48 | {% else %} 49 | {% do log('Dropping: {} ...'.format(all_tables_to_drop), info=True) %} 50 | {% do run_query(drop_table_sql) %} 51 | {% do log('Done.', info=True) %} 52 | {% endif %} 53 | {% else %} 54 | {% do log('No tables to drop!', info=True) %} 55 | {% endif %} 56 | {% endmacro %} 57 | -------------------------------------------------------------------------------- /macros/generate_base_models.sql: -------------------------------------------------------------------------------- 1 | {% macro generate_base_models(source_name, exclude=[], include=[], leading_commas=False, case_sensitive_cols=False) %} 2 | 3 | {%- if include | length > 0 and exclude | length > 0 -%} 4 | -- You cannot use both include and exclude arguments 5 | {%- elif execute -%} 6 | 7 | {% set sources = graph.sources.values() | selectattr('source_name', '==', source_name) %} 8 | 9 | {%- if exclude | length > 0 -%} 10 | {%- set sources = sources | rejectattr('name', 'in', exclude) -%} 11 | {%- endif -%} 12 | 13 | {%- if include | length > 0 -%} 14 | {%- set sources = sources | selectattr('name', 'in', include) -%} 15 | {%- endif -%} 16 | 17 | {%- for source in sources -%} 18 | 19 | -- ______________________ stg_{{ source_name }}__{{ source.name }}.sql ______________________ 20 | 21 | {{ codegen.generate_base_model(source_name, source.name, leading_commas, case_sensitive_cols) }} 22 | 23 | {%- endfor -%} 24 | 25 | -- ______________________ END ______________________ 26 | 27 | {%- endif -%} 28 | 29 | {% endmacro %} 30 | -------------------------------------------------------------------------------- /macros/generate_models_yaml.sql: -------------------------------------------------------------------------------- 1 | {% macro generate_models_yaml(model_names) %} 2 | 3 | {% set model_yaml=[] %} 4 | 5 | {% do model_yaml.append('version: 2') %} 6 | {% do model_yaml.append('') %} 7 | {% do model_yaml.append('models:') %} 8 | 9 | {% for model_name in model_names %} 10 | 11 | {% do model_yaml.append(' - name: ' ~ model_name | lower) %} 12 | {% do model_yaml.append(' description: ""') %} 13 | {% do model_yaml.append(' columns:') %} 14 | 15 | {% set relation=ref(model_name) %} 16 | {%- set columns = adapter.get_columns_in_relation(relation) -%} 17 | 18 | {% for column in columns %} 19 | {% do model_yaml.append(' - name: ' ~ column.name | lower ) %} 20 | {% do model_yaml.append(' description: ""') %} 21 | {% endfor %} 22 | 23 | {% do model_yaml.append('') %} 24 | 25 | {% endfor %} 26 | 27 | {% if execute %} 28 | 29 | {% set joined = model_yaml | join ('\n') %} 30 | {{ log(joined, info=True) }} 31 | {% do return(joined) %} 32 | 33 | {% endif %} 34 | 35 | {% endmacro %} -------------------------------------------------------------------------------- /macros/grant_all_on_schemas.sql: -------------------------------------------------------------------------------- 1 | {% macro grant_all_on_schemas(schemas, role) %} 2 | {% for schema in schemas %} 3 | grant usage on schema {{ schema }} to role {{ role }}; 4 | grant select on all tables in schema {{ schema }} to role {{ role }}; 5 | grant select on all views in schema {{ schema }} to role {{ role }}; 6 | grant select on future tables in schema {{ schema }} to role {{ role }}; 7 | grant select on future views in schema {{ schema }} to role {{ role }}; 8 | {% endfor %} 9 | {% endmacro %} -------------------------------------------------------------------------------- /macros/limit_data_in_dev.sql: -------------------------------------------------------------------------------- 1 | {% macro limit_data_in_dev(filter_column_name, lookback_days=7) %} 2 | 3 | {% if target.name == 'dev' %} 4 | 5 | 6 | where {{ filter_column_name }} >= dateadd('day', -{{ lookback_days }}, current_timestamp) 7 | {% endif %} 8 | 9 | 10 | {% endmacro %} -------------------------------------------------------------------------------- /macros/macro.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | macros: 4 | - name: cents_to_dollars 5 | description: A macro to convert cents to dollars 6 | arguments: 7 | - name: column_name 8 | type: STRING 9 | description: The name of the column you want to convert 10 | - name: precision 11 | type: INTEGER 12 | description: Number of decimal places. Defaults to 2. 13 | 14 | - name: limit_data_in_dev 15 | description: A macro to show conditional logic to limit data when working in dev. 16 | arguments: 17 | - name: filter_column_name 18 | type: DATE 19 | description: The name of the column you want to convert 20 | - name: lookback_days 21 | type: NUMBER 22 | description: Number of lookback days. Defaults to 7. 23 | 24 | - name: money 25 | description: A macro to convert a column to consistent precision 26 | arguments: 27 | - name: col 28 | type: INTEGER 29 | description: column getting converted to decimal(16,4) 30 | 31 | - name: create_udfs 32 | description: An example of how to create UDFs via macros 33 | 34 | - name: grant_all_on_schemas 35 | description: An example of how to grant permissions on schema 36 | arguments: 37 | - name: schemas 38 | type: STRING 39 | description: name of the schema you would like to give the role access to 40 | - name: role 41 | type: STRING 42 | description: user/role within your warehouse you would like to give dbt access to 43 | 44 | - name: unit_test_mode 45 | description: An example of how to create a unit test 46 | 47 | - name: create_area_of_circle 48 | description: Continuation of UDF example 49 | 50 | - name: test_all_values_gte_zero 51 | description: Macro to ensure values are greater than zero -- perfect for data tests 52 | arguments: 53 | - name: table 54 | type: STRING 55 | description: name of the table being tested 56 | - name: column 57 | type: STRING 58 | description: name of the column being tested 59 | 60 | - name: standard_account_fields 61 | description: An example of using a macro to standardize the fiels within the analysis 62 | arguments: 63 | - name: return_fields 64 | type: STRING 65 | description: fields you would like returned for analysis 66 | -------------------------------------------------------------------------------- /macros/omit_soft_deletions.sql: -------------------------------------------------------------------------------- 1 | {% macro omit_soft_deletions(table, columns) %} 2 | 3 | select {{ ', '.join(columns) }} 4 | from {{ table }} 5 | where not is_deleted 6 | 7 | {% endmacro %} 8 | 9 | 10 | {# Example Usage 11 | 12 | -- fct_orders.sql 13 | with orders as ( 14 | {{ omit_soft_deletions( 15 | source('tpch', 'orders'), 16 | [ 17 | 'order_key', 18 | 'customer_key', 19 | 'status_code', 20 | 'total_price', 21 | 'order_date', 22 | 'priority_code', 23 | 'clerk_name', 24 | 'ship_priority', 25 | 'comment' 26 | ] 27 | )}} 28 | ) 29 | 30 | select * from orders 31 | 32 | #} -------------------------------------------------------------------------------- /macros/select_extremes.sql: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Use Case: 4 | - Predictions made at least on a daily cadence for various different objects 5 | - We wanted to see how predictions changed for a particular grain (first -> last) 6 | - This macro allows us to return earliest and latest predictions for multiple objects 7 | by writing this simple code. 8 | 9 | invoice_predictions.sql 10 | {{ select_earliest(source('ds_source', 'invoice_predictions_weekly_all'), ['invoice_id', 'as_of_date']) }} 11 | 12 | bill_predictions.sql 13 | {{ select_latest(source('ds_source', 'invoice_predictions_weekly_all'), ['bill_id', 'as_of_date']) }} 14 | 15 | - We had 8 models that referenced these macros. 16 | - Reduce amount of code writing, copy/pasting 17 | - If logic does change, we're updating in one place 18 | 19 | */ 20 | 21 | {% macro _select_extremes(fn, table_name, key_columns, timestamp_column='loaded_at') %} 22 | select a.* from {{ table_name }} as a 23 | inner join ( 24 | select {{ ','.join(key_columns) }}, {{ fn }}({{ timestamp_column }}) as last_update 25 | from {{ table_name }} 26 | group by {{ ','.join(key_columns) }} 27 | ) as b on ( 28 | {% for key_column in key_columns %} 29 | a.{{ key_column }} = b.{{ key_column }} 30 | and 31 | {% endfor %} 32 | a.{{ timestamp_column }} = b.last_update 33 | ) 34 | {% endmacro %} 35 | 36 | {% macro select_latest(table_name, key_columns, timestamp_column='loaded_at') %} 37 | {{ _select_extremes('max', table_name, key_columns, timestamp_column) }} 38 | {% endmacro %} 39 | 40 | {% macro select_earliest(table_name, key_columns, timestamp_column='loaded_at') %} 41 | {{ _select_extremes('min', table_name, key_columns, timestamp_column) }} 42 | {% endmacro %} -------------------------------------------------------------------------------- /macros/snowflake/dont_do_this.sql: -------------------------------------------------------------------------------- 1 | {% macro dont_do_this() %} 2 | 3 | {% set sql %} 4 | 5 | select * from {{ ref('dim_customers') }} 6 | limit 10 7 | 8 | {% endset %} 9 | 10 | {% set results = run_query(sql).rows %} 11 | 12 | {% for result in results %} 13 | 14 | {{ log(result, info=True) }} 15 | 16 | {% endfor %} 17 | 18 | {% endmacro %} -------------------------------------------------------------------------------- /macros/snowflake/materializations/incremental_custom.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro dbt_snowflake_validate_get_incremental_strategy(config) %} 3 | {#-- Find and validate the incremental strategy #} 4 | {%- set strategy = config.get("incremental_strategy", default="merge") -%} 5 | 6 | {% set invalid_strategy_msg -%} 7 | Invalid incremental strategy provided: {{ strategy }} 8 | Expected one of: 'merge', 'delete+insert', 'insert_overwrite' 9 | {%- endset %} 10 | {% if strategy not in ['merge', 'delete+insert', 'insert_overwrite'] %} 11 | {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} 12 | {% endif %} 13 | 14 | {% do return(strategy) %} 15 | {% endmacro %} 16 | 17 | {% macro dbt_snowflake_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, dest_columns) %} 18 | {% if strategy == 'merge' %} 19 | {% do return(get_merge_sql(target_relation, tmp_relation, unique_key, dest_columns)) %} 20 | {% elif strategy == 'delete+insert' %} 21 | {% do return(get_delete_insert_merge_sql(target_relation, tmp_relation, unique_key, dest_columns)) %} 22 | {% elif strategy == 'insert_overwrite' %} 23 | {% do return(get_insert_overwrite_sql(target_relation, tmp_relation, unique_key, dest_columns)) %} 24 | {% else %} 25 | {% do exceptions.raise_compiler_error('invalid strategy: ' ~ strategy) %} 26 | {% endif %} 27 | {% endmacro %} 28 | 29 | {% macro incremental_validate_delete_target_not_in_source(delete_target_not_in_source, strategy, unique_key, default) %} 30 | 31 | {% if not delete_target_not_in_source %} 32 | {{ return(False) }} 33 | {% elif delete_target_not_in_source and strategy not in ['merge', 'delete+insert'] %} 34 | {% do exceptions.raise_compiler_error('invalid strategy for delete_target_not_in_source, must be one of: [merge, delete+insert]') %} 35 | {% elif delete_target_not_in_source and not unique_key %} 36 | {% do exceptions.raise_compiler_error('invalid configuration, must specify a unique_key to when delete_target_not_in_source is set to True') %} 37 | {% else %} 38 | {{ return(True) }} 39 | {% endif %} 40 | 41 | {% endmacro %} 42 | 43 | {% macro delete_from_target_not_in_source(tmp_relation, target_relation, unique_key) %} 44 | 45 | delete from {{ target_relation }} where {{ unique_key }} not in (select {{ unique_key }} from {{ tmp_relation }} ); 46 | 47 | {% endmacro %} 48 | 49 | {% materialization incremental_custom, adapter='snowflake' -%} 50 | 51 | {% set original_query_tag = set_query_tag() %} 52 | 53 | {%- set unique_key = config.get('unique_key') -%} 54 | {%- set full_refresh_mode = (should_full_refresh()) -%} 55 | 56 | {% set target_relation = this %} 57 | {% set existing_relation = load_relation(this) %} 58 | {% set tmp_relation = make_temp_relation(this) %} 59 | 60 | {#-- Validate early so we don't run SQL if the strategy is invalid --#} 61 | {% set strategy = dbt_snowflake_validate_get_incremental_strategy(config) -%} 62 | {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %} 63 | {% set delete_target_not_in_source = incremental_validate_delete_target_not_in_source( 64 | delete_target_not_in_source = config.get('delete_target_not_in_source'), 65 | strategy=strategy, 66 | unique_key=unique_key, 67 | default=False 68 | ) 69 | %} 70 | 71 | {{ run_hooks(pre_hooks) }} 72 | 73 | {% if existing_relation is none %} 74 | {% set build_sql = create_table_as(False, target_relation, sql) %} 75 | 76 | {% elif existing_relation.is_view %} 77 | {#-- Can't overwrite a view with a table - we must drop --#} 78 | {{ log("Dropping relation " ~ target_relation ~ " because it is a view and this model is a table.") }} 79 | {% do adapter.drop_relation(existing_relation) %} 80 | {% set build_sql = create_table_as(False, target_relation, sql) %} 81 | 82 | {% elif full_refresh_mode %} 83 | {% set build_sql = create_table_as(False, target_relation, sql) %} 84 | 85 | {% else %} 86 | {% do run_query(create_table_as(True, tmp_relation, sql)) %} 87 | {% do adapter.expand_target_column_types( 88 | from_relation=tmp_relation, 89 | to_relation=target_relation) %} 90 | {#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#} 91 | {% set dest_columns = process_schema_changes(on_schema_change, tmp_relation, existing_relation) %} 92 | {% if not dest_columns %} 93 | {% set dest_columns = adapter.get_columns_in_relation(existing_relation) %} 94 | {% endif %} 95 | {% set build_sql = dbt_snowflake_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, dest_columns) %} 96 | {% set delete_sql = delete_from_target_not_in_source(tmp_relation, target_relation, unique_key) %} 97 | {% endif %} 98 | 99 | {%- call statement('main') -%} 100 | {{ build_sql }} 101 | {% if delete_target_not_in_source %} 102 | {{ delete_sql }} 103 | {% endif %} 104 | {%- endcall -%} 105 | 106 | {{ run_hooks(post_hooks) }} 107 | 108 | {% set target_relation = target_relation.incorporate(type='table') %} 109 | {% do persist_docs(target_relation, model) %} 110 | 111 | {% do unset_query_tag(original_query_tag) %} 112 | 113 | {{ return({'relations': [target_relation]}) }} 114 | 115 | {%- endmaterialization %} 116 | -------------------------------------------------------------------------------- /macros/snowflake/share_view.sql: -------------------------------------------------------------------------------- 1 | {% macro share_view() %} 2 | 3 | {# 4 | 5 | Usage: 6 | 7 | This would be used as a post-hook in the config block at the top of a model 8 | 9 | Example: 10 | 11 | {{ config(post_hook='{{ share_view() }}') }} 12 | 13 | #} 14 | 15 | -- Only run in production 16 | {% if target.name == 'prod' %} 17 | 18 | {% set sql %} 19 | -- Create a table with all data to be shared 20 | create or replace table share_db.private.{{ this.name }} as 21 | select * from {{ this }}; 22 | 23 | grant select on share_db.private.{{ this.name }} to role transformer; 24 | 25 | -- Create a secure view which selects based on current account 26 | create or replace secure view share_db.public.{{ this.name }} as 27 | select a.* 28 | from share_db.private.{{ this.name }} as a 29 | inner join share_db.private.company_shares as b on ( 30 | a.customer_id = b.customer_id 31 | and b.snowflake_account = current_account() 32 | ); 33 | 34 | grant select on share_db.public.{{ this.name }} to share customer_share; 35 | {% endset %} 36 | 37 | {% set table = run_query(sql) %} 38 | 39 | {% endif %} 40 | 41 | {% endmacro %} -------------------------------------------------------------------------------- /macros/standard_account_fields.sql: -------------------------------------------------------------------------------- 1 | {% macro standard_account_fields() %} 2 | 3 | {# How to use 4 | 5 | select {{ standard_account_fields() }} 6 | from {{ ref('fct_orders') }} 7 | 8 | #} 9 | 10 | {%- set return_fields = ["gross_item_sales_amount", 11 | "item_discount_amount", 12 | "item_tax_amount", 13 | "net_item_sales_amount"] 14 | -%} 15 | 16 | {%- for field in return_fields %} 17 | {{ field }}{% if not loop.last %},{% endif %}{% endfor -%} 18 | 19 | {% endmacro %} -------------------------------------------------------------------------------- /macros/test_all_values_gte_zero.sql: -------------------------------------------------------------------------------- 1 | {% macro test_all_values_gte_zero(table, column) %} 2 | 3 | select * from {{ ref(table) }} where {{ column }} < 0 4 | 5 | {% endmacro %} -------------------------------------------------------------------------------- /macros/udf_area_of_circle.sql: -------------------------------------------------------------------------------- 1 | {% macro create_area_of_circle() %} 2 | 3 | use database {{target.database}}; 4 | 5 | drop function if exists {{target.schema}}.area_of_circle(float); 6 | 7 | create function {{target.schema}}.area_of_circle(radius float) 8 | returns float 9 | as 10 | $$ 11 | pi() * radius * radius 12 | $$ 13 | ; 14 | 15 | {% endmacro %} -------------------------------------------------------------------------------- /macros/unit_test_mode.sql: -------------------------------------------------------------------------------- 1 | {% macro unit_test_mode() %} 2 | 3 | {% if var is not defined %} 4 | {{ return(False) }} 5 | 6 | {% elif var('test') == 'true' %} 7 | {{ return(True) }} 8 | 9 | {% else %} 10 | {{ return(False) }} 11 | 12 | {% endif %} 13 | 14 | {% endmacro %} -------------------------------------------------------------------------------- /models/demo_examples/demo_examples.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: materialization_incremental 5 | description: demo to show incremental model 6 | columns: 7 | - name: customer_key 8 | tests: 9 | - unique 10 | - not_null 11 | 12 | - name: use_variables 13 | description: demo to show variables 14 | columns: 15 | - name: order_item_key 16 | tests: 17 | - unique 18 | - not_null 19 | 20 | sources: 21 | - name: test_eqrx_s3 22 | schema: dbt_dguthrie 23 | tables: 24 | - name: dummy_data 25 | description: Testing EQRx setup 26 | external: 27 | location: '@eqrx_test' 28 | file_format: "( type = csv )" 29 | -------------------------------------------------------------------------------- /models/demo_examples/external_sources.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Do you have data in S3? Do you want to access this data within Snowflake from dbt? 3 | # Well you can, with the dbt-external-tables package! 4 | # 5 | # GUIDE 6 | # 7 | # (1) Create a Snowflake external stage, e.g.,: 8 | # 9 | # create or replace stage boerse url='s3://deutsche-boerse-eurex-pds/' file_format = (type=csv) 10 | # 11 | # (2) Add the dbt-external-tables package in packages.yml 12 | # 13 | # (3) Execute `dbt run-operation stage_external_sources` to create the external tables, 14 | # and to refresh them later. (Add this to your dbt Cloud Job.) 15 | # 16 | # (4) Access the source data from dbt, e.g., 17 | # 18 | # select * from {{ source('deutsche_boerse', 'trading_data') }} 19 | # 20 | # OTHER EXTERNAL TABLES 21 | # 22 | # This package works with Snowpipes, GCS buckets, Redshift Spectrum tables, and more! 23 | # 24 | # MORE INFO 25 | # 26 | # - dbt package: https://github.com/dbt-labs/dbt-external-tables/ 27 | # - external tables: https://docs.snowflake.com/en/user-guide/tables-external-intro.html 28 | # - sample data: https://registry.opendata.aws/deutsche-boerse-pds/ 29 | # 30 | 31 | version: 2 32 | 33 | sources: 34 | - name: deutsche_boerse 35 | description: This is an example of using external sources 36 | database: analytics 37 | schema: boerse 38 | 39 | tables: 40 | - name: trading_data 41 | description: > 42 | The Deutsche Börse Public Data Set consists of trade data aggregated to one minute 43 | intervals from the Eurex and Xetra trading systems. It provides the initial price, 44 | lowest price, highest price, final price and volume for every minute of the trading 45 | day, and for every tradeable security. 46 | 47 | external: 48 | location: "@boerse" 49 | file_format: "(type=csv field_delimiter=',' skip_header=1)" 50 | auto_refresh: false 51 | 52 | partitions: 53 | - name: source_file_name 54 | data_type: varchar 55 | expression: metadata$filename 56 | 57 | columns: 58 | - name: ISIN 59 | data_type: TEXT 60 | description: "ISIN of the security" 61 | - name: MarketSegment 62 | data_type: TEXT 63 | description: "The product market segment, following the convention on http://www.eurexchange.com" 64 | - name: UnderlyingSymbol 65 | data_type: TEXT 66 | description: "The underlying security" 67 | - name: UnderlyingISIN 68 | data_type: TEXT 69 | description: "ISIN of any underlying security " 70 | - name: Currency 71 | data_type: TEXT 72 | description: "Currency in which the product is traded (ISO 4127)" 73 | - name: SecurityType 74 | data_type: TEXT 75 | description: "Type of instrument" 76 | - name: MaturityDate 77 | data_type: TEXT 78 | description: "Maturity date of the security" 79 | - name: StrikePrice 80 | data_type: NUMBER 81 | description: "Strike price" 82 | - name: PutOrCall 83 | data_type: TEXT 84 | description: "Type of option (string: PUT or CALL)" 85 | - name: MLEG 86 | data_type: TEXT 87 | description: "Identifies multi-leg options" 88 | - name: ContractGenerationNumber 89 | data_type: TEXT 90 | description: "The generation number for options contracts" 91 | - name: SecurityID 92 | data_type: TEXT 93 | description: "Unique identifier for each contract" 94 | - name: Date 95 | data_type: DATE 96 | description: "Date of trading period" 97 | - name: Time 98 | data_type: TEXT 99 | description: "Minute of trading to which this entry relates" 100 | - name: StartPrice 101 | data_type: NUMBER 102 | description: "Trading price at the start of period" 103 | - name: MaxPrice 104 | data_type: NUMBER 105 | description: "Maximum price over the period" 106 | - name: MinPrice 107 | data_type: NUMBER 108 | description: "Minimum price over the period" 109 | - name: EndPrice 110 | data_type: NUMBER 111 | description: "Trading price at the end of the period" 112 | - name: NumberOfContracts 113 | data_type: INT 114 | description: "Number of contracts traded during the period" 115 | - name: NumberOfTrades 116 | data_type: INT 117 | description: "Number of distinct trades during the period" 118 | 119 | -------------------------------------------------------------------------------- /models/demo_examples/materialization_incremental.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='incremental') }} 2 | 3 | with source as ( 4 | 5 | select * from {{ source('tpch', 'customer') }} 6 | 7 | ), 8 | 9 | 10 | renamed as ( 11 | 12 | select 13 | c_custkey as customer_key, 14 | c_name as name, 15 | c_address as address, 16 | c_nationkey as nation_key, 17 | c_phone as phone_number, 18 | c_acctbal as account_balance, 19 | c_mktsegment as market_segment, 20 | c_comment as comment 21 | 22 | from source 23 | 24 | ) 25 | 26 | select * from renamed 27 | 28 | {% if is_incremental() %} 29 | -- this filter will only be applied on an incremental run 30 | where customer_key not in (select customer_key from {{this}} ) 31 | 32 | {% endif %} -------------------------------------------------------------------------------- /models/demo_examples/use_variables.sql: -------------------------------------------------------------------------------- 1 | 2 | -- This is here to show that data older than start_date exists - run this first 3 | -- select min(order_date) from {{ ref('fct_order_items') }} 4 | 5 | -- start_date is defined in the dbt_project.yml 6 | -- to illustrate overriding variables from the command line, run dbt run -m use_variables --vars '{"start_date": "1996-01-01"}' 7 | select * from {{ ref('fct_order_items') }} where order_date >= '{{ var("start_date") }}' 8 | 9 | 10 | -------------------------------------------------------------------------------- /models/marts/aggregates/agg_ship_modes_dynamic_pivot.sql: -------------------------------------------------------------------------------- 1 | /* Create a pivot table with dynamic columns based on the ship modes that are in the system */ 2 | 3 | {%- call statement('result', fetch_result=True) -%} 4 | 5 | {# this pulls the unique ship modes from the fct_order_items table #} 6 | select ship_mode from {{ ref('fct_order_items') }} group by 1 7 | 8 | {%- endcall %} 9 | 10 | {% set ship_modes = load_result('result').table.columns[0].values() %} 11 | 12 | select 13 | date_part('year', order_date) as order_year, 14 | 15 | {# Loop over ship_modes array from above, and sum based on whether the record matches the ship mode #} 16 | {%- for ship_mode in ship_modes -%} 17 | sum(case when ship_mode = '{{ship_mode}}' then gross_item_sales_amount end) as "{{ship_mode|replace(' ', '_')}}_amount" 18 | {%- if not loop.last -%},{% endif %} 19 | {% endfor %} 20 | 21 | from {{ ref('fct_order_items') }} 22 | group by 1 23 | -------------------------------------------------------------------------------- /models/marts/aggregates/agg_ship_modes_hardcoded_pivot.sql: -------------------------------------------------------------------------------- 1 | /* Create a pivot table with hard-coded columns based on a query of the ship modes that are in the system */ 2 | 3 | with merged as ( 4 | select 5 | date_part('year', order_date) as order_year, 6 | ship_mode, 7 | gross_item_sales_amount 8 | from {{ ref('fct_order_items') }} 9 | ) 10 | 11 | select 12 | * 13 | from 14 | merged 15 | -- have to manually map strings in the pivot operation 16 | pivot(sum(gross_item_sales_amount) for ship_mode in ( 17 | 'AIR', 18 | 'REG AIR', 19 | 'FOB', 20 | 'RAIL', 21 | 'MAIL', 22 | 'SHIP', 23 | 'TRUCK' 24 | )) as p 25 | 26 | order by order_year 27 | -------------------------------------------------------------------------------- /models/marts/aggregates/aggregates.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | # These two models are just different ways of doing the same thing (pivot over categories) using jinja and the PIVOT operation in Snowflake 5 | - name: agg_ship_modes_hardcoded_pivot 6 | description: Example of creating a pivot table with dynamic columns based on the ship modes that are in the system 7 | columns: 8 | - name: order_year 9 | description: year of the order 10 | 11 | - name: agg_ship_modes_dynamic_pivot 12 | description: Example of creating a pivot table with hard-coded columns based on a query of the ship modes that are in the system 13 | columns: 14 | - name: order_year 15 | description: year of the order -------------------------------------------------------------------------------- /models/marts/aggregates/exposures.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | exposures: 4 | - name: sales_by_region 5 | description: | 6 | # An h1 header 7 | ============ 8 | 9 | Paragraphs are separated by a blank line. 10 | 11 | 2nd paragraph. *Italic*, **bold**, and `monospace` 12 | Itemized lists look like: 13 | * this one 14 | * that one 15 | * the other one 16 | 17 | # type could be {dashboard, notebook, analysis, ml, application} 18 | type: dashboard 19 | 20 | # this is just a link to the thing itself for click through from documentation 21 | url: https://10az.online.tableau.com/#/site/dbtlabspartner/views/SalesWorkbook/Dashboard1 22 | 23 | # convenience feature - relative scale of {high, medium, low} 24 | maturity: high 25 | 26 | # documentation purposes for point of contact if stuff breaks 27 | owner: 28 | name: Doug Guthrie 29 | email: doug.guthrie@dbtlabs.com 30 | 31 | # IMPORTANT: determines the lineage relationship of the exposure construct to the rest of your DAG 32 | depends_on: 33 | - ref('fct_orders') -------------------------------------------------------------------------------- /models/marts/core/core.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dim_customers 5 | description: Customer dimensions table 6 | columns: 7 | - name: customer_key 8 | description: Primary key on the customers table 9 | tests: 10 | - unique 11 | - not_null 12 | meta: 13 | metrics: 14 | total_unique_customers: 15 | type: count 16 | - name: region 17 | description: region name 18 | tests: 19 | - accepted_values: 20 | values: ['AFRICA','MIDDLE EAST','ASIA','EUROPE','AMERICA'] 21 | severity: warn 22 | - name: name 23 | description: customer id 24 | - name: address 25 | description: address of the customer 26 | - name: nation 27 | description: nation name 28 | - name: phone_number 29 | description: phone number of the customer 30 | - name: account_balance 31 | description: '{{ doc("account_balance") }}' 32 | - name: market_segment 33 | description: market segment of the customer 34 | meta: 35 | joins: 36 | - join: stg_tpch_customers 37 | sql_on: ${dim_customers.customer_key} = ${stg_tpch_customers.customer_key} 38 | - join: stg_tpch_nations 39 | sql_on: ${stg_tpch_customers.nation_key} = ${stg_tpch_nations.nation_key} 40 | 41 | - name: dim_parts 42 | description: Parts dimensions table 43 | columns: 44 | - name: part_key 45 | description: primary key of the model 46 | tests: 47 | - unique 48 | - not_null 49 | - name: manufacturer 50 | description: manufacturer of the part 51 | - name: name 52 | description: name of the part 53 | - name: brand 54 | description: brand of the part 55 | - name: type 56 | description: type of part including material 57 | - name: size 58 | description: size of the part 59 | - name: container 60 | description: container of the part 61 | - name: retail_price 62 | description: '{{ doc("retail_price") }}' 63 | 64 | - name: dim_suppliers 65 | description: Suppliers dimensions table 66 | columns: 67 | - name: supplier_key 68 | description: primary key of the model 69 | tests: 70 | - unique 71 | - not_null 72 | - name: supplier_name 73 | description: '{{ doc("supplier_name") }}' 74 | - name: supplier_address 75 | description: '{{ doc("supplier_address") }}' 76 | - name: nation 77 | description: nation name 78 | - name: region 79 | description: region name 80 | - name: phone_number 81 | description: '{{ doc("phone_number") }}' 82 | - name: account_balance 83 | description: '{{ doc("account_balance") }}' 84 | meta: 85 | joins: 86 | - join: stg_tpch_suppliers 87 | sql_on: ${dim_suppliers.supplier_key} = ${stg_tpch_suppliers.supplier_key} 88 | 89 | - name: fct_order_items 90 | description: order items fact table 91 | columns: 92 | - name: order_item_key 93 | description: '{{ doc("order_item_key") }}' 94 | tests: 95 | - unique 96 | - not_null 97 | - name: order_key 98 | description: foreign key for orders 99 | meta: 100 | metrics: 101 | items_in_order: 102 | type: count_distinct 103 | - name: order_date 104 | description: date of the order 105 | - name: customer_key 106 | description: foreign key for customers 107 | - name: part_key 108 | description: foreign key for part 109 | - name: supplier_key 110 | description: foreign key for suppliers 111 | - name: order_item_status_code 112 | description: status of the order item 113 | - name: return_flag 114 | description: '{{ doc("return_flag") }}' 115 | - name: line_number 116 | description: '{{ doc("line_number") }}' 117 | - name: ship_date 118 | description: '{{ doc("ship_date") }}' 119 | - name: commit_date 120 | description: '{{ doc("commit_date") }}' 121 | - name: receipt_date 122 | description: '{{ doc("receipt_date") }}' 123 | - name: ship_mode 124 | description: '{{ doc("ship_mode") }}' 125 | - name: supplier_cost 126 | description: '{{ doc("cost") }}' 127 | - name: base_price 128 | description: '{{ doc("base_price") }}' 129 | - name: discount_percentage 130 | description: '{{ doc("discount_percentage") }}' 131 | - name: discounted_price 132 | description: '{{ doc("discounted_price") }}' 133 | - name: tax_rate 134 | description: '{{ doc("tax_rate") }}' 135 | - name: order_item_count 136 | description: count of order items 137 | - name: quantity 138 | description: total units 139 | - name: gross_item_sales_amount 140 | description: '{{ doc("gross_item_sales_amount") }}' 141 | meta: 142 | metrics: 143 | total_revenue: 144 | type: sum 145 | show_underlying_values: 146 | - return_flag 147 | - order_item_status_code 148 | - supplier_key 149 | - ship_mode 150 | - name: discounted_item_sales_amount 151 | description: '{{ doc("discounted_item_sales_amount") }}' 152 | - name: item_discount_amount 153 | description: '{{ doc("item_discount_amount") }}' 154 | - name: item_tax_amount 155 | description: '{{ doc("item_tax_amount") }}' 156 | - name: net_item_sales_amount 157 | description: '{{ doc("net_item_sales_amount") }}' 158 | meta: 159 | joins: 160 | - join: dim_customers 161 | sql_on: ${fct_order_items.customer_key} = ${dim_customers.customer_key} 162 | - join: stg_tpch_suppliers 163 | sql_on: ${fct_order_items.supplier_key} = ${stg_tpch_suppliers.supplier_key} 164 | 165 | - name: fct_orders 166 | description: orders fact table 167 | columns: 168 | - name: order_key 169 | description: primary key of the model 170 | tests: 171 | - unique 172 | - not_null 173 | meta: 174 | metrics: 175 | total_orders: 176 | type: sum 177 | - name: customer_key 178 | description: foreign key for customers 179 | tests: 180 | - relationships: 181 | to: ref('dim_customers') 182 | field: customer_key 183 | severity: error 184 | meta: 185 | metrics: 186 | total_unique_customers: 187 | type: count_distinct 188 | - name: order_date 189 | description: date of the order 190 | - name: status_code 191 | description: status of the order 192 | - name: priority_code 193 | description: code associated with the order 194 | - name: clerk_name 195 | description: id of the clerk 196 | - name: ship_priority 197 | description: numeric representation of the shipping priority, zero being the default 198 | - name: order_count 199 | description: count of order 200 | - name: gross_item_sales_amount 201 | description: '{{ doc("gross_item_sales_amount") }}' 202 | - name: item_discount_amount 203 | description: '{{ doc("item_discount_amount") }}' 204 | - name: item_tax_amount 205 | description: '{{ doc("item_tax_amount") }}' 206 | - name: net_item_sales_amount 207 | description: '{{ doc("net_item_sales_amount") }}' 208 | meta: 209 | joins: 210 | - join: dim_customers 211 | sql_on: ${fct_orders.customer_key} = ${dim_customers.customer_key} 212 | -------------------------------------------------------------------------------- /models/marts/core/dim_customers.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'table', 4 | transient=false 5 | ) 6 | }} 7 | 8 | with customer as ( 9 | 10 | select * from {{ ref('stg_tpch_customers') }} 11 | 12 | ), 13 | nation as ( 14 | 15 | select * from {{ ref('stg_tpch_nations') }} 16 | ), 17 | region as ( 18 | 19 | select * from {{ ref('stg_tpch_regions') }} 20 | 21 | ), 22 | final as ( 23 | select 24 | customer.customer_key, 25 | customer.name, 26 | customer.address, 27 | nation.nation_key, 28 | nation.name as nation, 29 | region.region_key, 30 | region.name as region, 31 | customer.phone_number, 32 | customer.account_balance, 33 | customer.market_segment 34 | from 35 | customer 36 | inner join nation 37 | on customer.nation_key = nation.nation_key 38 | inner join region 39 | on nation.region_key = region.region_key 40 | ) 41 | select 42 | * 43 | from 44 | final 45 | order by 46 | customer_key 47 | -------------------------------------------------------------------------------- /models/marts/core/dim_parts.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'table' 4 | ) 5 | }} 6 | with part as ( 7 | 8 | select * from {{ref('stg_tpch_parts')}} 9 | 10 | ), 11 | 12 | final as ( 13 | select 14 | part_key, 15 | manufacturer, 16 | name, 17 | brand, 18 | type, 19 | size, 20 | container, 21 | retail_price 22 | from 23 | part 24 | ) 25 | select * 26 | from final 27 | order by part_key -------------------------------------------------------------------------------- /models/marts/core/dim_suppliers.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'table' 4 | ) 5 | }} 6 | 7 | with supplier as ( 8 | 9 | select * from {{ ref('stg_tpch_suppliers') }} 10 | 11 | ), 12 | nation as ( 13 | 14 | select * from {{ ref('stg_tpch_nations') }} 15 | ), 16 | region as ( 17 | 18 | select * from {{ ref('stg_tpch_regions') }} 19 | 20 | ), 21 | final as ( 22 | 23 | select 24 | supplier.supplier_key, 25 | supplier.supplier_name, 26 | supplier.supplier_address, 27 | nation.name as nation, 28 | region.name as region, 29 | supplier.phone_number, 30 | supplier.account_balance 31 | from 32 | supplier 33 | inner join nation 34 | on supplier.nation_key = nation.nation_key 35 | inner join region 36 | on nation.region_key = region.region_key 37 | ) 38 | 39 | select * from final 40 | -------------------------------------------------------------------------------- /models/marts/core/fct_order_items.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'table', 4 | tags = ['finance'] 5 | ) 6 | }} 7 | 8 | with order_item as ( 9 | 10 | select * from {{ ref('order_items') }} 11 | 12 | ), 13 | 14 | part_supplier as ( 15 | 16 | select * from {{ ref('part_suppliers') }} 17 | 18 | ), 19 | 20 | final as ( 21 | select 22 | order_item.order_item_key, 23 | order_item.order_key, 24 | order_item.order_date, 25 | order_item.customer_key, 26 | order_item.part_key, 27 | order_item.supplier_key, 28 | order_item.order_item_status_code, 29 | order_item.return_flag, 30 | order_item.line_number, 31 | order_item.ship_date, 32 | order_item.commit_date, 33 | order_item.receipt_date, 34 | order_item.ship_mode, 35 | part_supplier.cost as supplier_cost, 36 | {# ps.retail_price, #} 37 | order_item.base_price, 38 | order_item.discount_percentage, 39 | order_item.discounted_price, 40 | order_item.tax_rate, 41 | 42 | 1 as order_item_count, 43 | order_item.quantity, 44 | order_item.gross_item_sales_amount, 45 | order_item.discounted_item_sales_amount, 46 | order_item.item_discount_amount, 47 | order_item.item_tax_amount, 48 | order_item.net_item_sales_amount 49 | 50 | from 51 | order_item 52 | inner join part_supplier 53 | on order_item.part_key = part_supplier.part_key and 54 | order_item.supplier_key = part_supplier.supplier_key 55 | ) 56 | select 57 | * 58 | from 59 | final 60 | order by 61 | order_date 62 | -------------------------------------------------------------------------------- /models/marts/core/fct_orders.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'table', 4 | tags=['finance'] 5 | ) 6 | }} 7 | 8 | 9 | with orders as ( 10 | 11 | select * from {{ ref('stg_tpch_orders') }} 12 | 13 | ), 14 | order_item as ( 15 | 16 | select * from {{ ref('order_items') }} 17 | 18 | ), 19 | order_item_summary as ( 20 | 21 | select 22 | order_key, 23 | sum(gross_item_sales_amount) as gross_item_sales_amount, 24 | sum(item_discount_amount) as item_discount_amount, 25 | sum(item_tax_amount) as item_tax_amount, 26 | sum(net_item_sales_amount) as net_item_sales_amount 27 | from order_item 28 | group by 29 | 1 30 | ), 31 | final as ( 32 | 33 | select 34 | 35 | orders.order_key, 36 | orders.order_date, 37 | orders.customer_key, 38 | orders.status_code, 39 | orders.priority_code, 40 | orders.ship_priority, 41 | orders.clerk_name, 42 | 1 as order_count, 43 | order_item_summary.gross_item_sales_amount, 44 | order_item_summary.item_discount_amount, 45 | order_item_summary.item_tax_amount, 46 | order_item_summary.net_item_sales_amount 47 | from 48 | orders 49 | inner join order_item_summary 50 | on orders.order_key = order_item_summary.order_key 51 | ) 52 | select 53 | * 54 | from 55 | final 56 | 57 | order by 58 | order_date 59 | -------------------------------------------------------------------------------- /models/marts/core/fct_orders_stats_py.py: -------------------------------------------------------------------------------- 1 | def model(dbt, session): 2 | 3 | # Access to config block 4 | dbt.config( 5 | materialized='table', 6 | snowflake_warehouse='SNOWPARK_WH', 7 | enabled=False, 8 | ) 9 | 10 | # Get upstream data 11 | df = dbt.ref('fct_orders') 12 | 13 | # Describe the data 14 | df = df.describe() 15 | 16 | return df 17 | -------------------------------------------------------------------------------- /models/marts/core/fct_orders_stats_sql.sql: -------------------------------------------------------------------------------- 1 | {% set ref_orders = ref('fct_orders') %} 2 | 3 | with 4 | 5 | orders as ( 6 | 7 | select * from {{ ref_orders }} 8 | 9 | ), 10 | 11 | described as ( 12 | 13 | {% set columns = adapter.get_columns_in_relation(ref_orders) %} 14 | {% set numeric_cols = [] %} 15 | {% for col in columns %} 16 | {% if col.dtype in ('NUMBER', 'FLOAT') %} 17 | {% do numeric_cols.append(col) %} 18 | {% endif %} 19 | {% endfor %} 20 | 21 | {% set stats = { 22 | 'stddev': 'stddev(...)', 23 | 'min': 'min(...)', 24 | 'mean': 'avg(...)', 25 | 'count': 'count(...)', 26 | 'max': 'max(...)', 27 | } %} 28 | 29 | {% for stat_name, stat_calc in stats.items() %} 30 | 31 | select 32 | '{{ stat_name }}' as metric, 33 | {% for col in numeric_cols %} 34 | {{ stat_calc | replace('...', col.name) }} as {{ col.name }}{{ ',' if not loop.last }} 35 | {% endfor %} 36 | 37 | from {{ ref_orders }} 38 | 39 | {{ 'union all' if not loop.last }} 40 | 41 | {% endfor %} 42 | 43 | ) 44 | 45 | select * from described 46 | -------------------------------------------------------------------------------- /models/marts/intermediate/intermediate.md: -------------------------------------------------------------------------------- 1 | # the intent of this .md is to remove redundancy in the documentation 2 | 3 | # the below are descriptions from order_items 4 | {% docs base_price %} since extended_price is the line item total, we back out the price per item {% enddocs %} 5 | 6 | {% docs discounted_price %} factoring in the discount_percentage, the line item discount total {% enddocs %} 7 | 8 | {% docs tax_rate %} tax rate of the order item {% enddocs %} 9 | 10 | {% docs gross_item_sales_amount %} same as extended_price {% enddocs %} 11 | 12 | {% docs discounted_item_sales_amount %} line item (includes quantity) discount amount{% enddocs %} 13 | 14 | {% docs item_discount_amount %} item level discount amount. this is always a negative number {% enddocs %} 15 | 16 | {% docs item_tax_amount %} item level tax total {% enddocs %} 17 | 18 | {% docs net_item_sales_amount %} the net total which factors in discount and tax {% enddocs %} 19 | -------------------------------------------------------------------------------- /models/marts/intermediate/intermediate.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: order_items 5 | description: Intermediate model where we calculate item price, discounts and tax. This model is at the order item level. 6 | tags: 7 | - output_table_type|orders 8 | columns: 9 | - name: order_item_key 10 | description: '{{ doc("order_item_key") }}' 11 | tests: 12 | - unique 13 | - not_null 14 | - name: order_key 15 | description: foreign key for orders 16 | - name: customer_key 17 | description: foreign key for customers 18 | - name: part_key 19 | description: foreign key for part 20 | - name: supplier_key 21 | description: foreign key for suppliers 22 | - name: order_date 23 | description: date of the order 24 | - name: order_status_code 25 | description: status of the order 26 | - name: return_flag 27 | description: '{{ doc("return_flag") }}' 28 | - name: line_number 29 | description: '{{ doc("line_number") }}' 30 | - name: order_item_status_code 31 | description: status of the order item 32 | - name: ship_date 33 | description: '{{ doc("ship_date") }}' 34 | - name: commit_date 35 | description: '{{ doc("commit_date") }}' 36 | - name: receipt_date 37 | description: '{{ doc("receipt_date") }}' 38 | - name: ship_mode 39 | description: '{{ doc("ship_mode") }}' 40 | - name: extended_price 41 | description: '{{ doc("extended_price") }}' 42 | - name: quantity 43 | description: total units 44 | - name: base_price 45 | description: '{{ doc("base_price") }}' 46 | - name: discount_percentage 47 | description: '{{ doc("discount_percentage") }}' 48 | - name: discounted_price 49 | description: '{{ doc("discounted_price") }}' 50 | - name: gross_item_sales_amount 51 | description: '{{ doc("gross_item_sales_amount") }}' 52 | - name: discounted_item_sales_amount 53 | description: '{{ doc("discounted_item_sales_amount") }}' 54 | - name: item_discount_amount 55 | description: '{{ doc("item_discount_amount") }}' 56 | - name: tax_rate 57 | description: '{{ doc("tax_rate") }}' 58 | - name: item_tax_amount 59 | description: item level tax total 60 | - name: net_item_sales_amount 61 | description: '{{ doc("net_item_sales_amount") }}' 62 | 63 | - name: part_suppliers 64 | description: Intermediate model where we join part, supplier and part_supplier. This model is at the part supplier level. 65 | columns: 66 | - name: part_supplier_key 67 | description: primary key of the models 68 | tests: 69 | - unique 70 | - not_null 71 | - name: part_key 72 | description: foreign key for part 73 | - name: part_name 74 | description: name of the part 75 | - name: manufacturer 76 | description: manufacturer of the part 77 | - name: brand 78 | description: brand of the part 79 | - name: part_type 80 | description: type of part including material 81 | - name: part_size 82 | description: size of the part 83 | - name: container 84 | description: container of the part 85 | - name: retail_price 86 | description: '{{ doc("retail_price") }}' 87 | - name: supplier_key 88 | description: foreign key for supplier 89 | - name: supplier_name 90 | description: '{{ doc("supplier_name") }}' 91 | - name: supplier_address 92 | description: '{{ doc("supplier_address") }}' 93 | - name: phone_number 94 | description: '{{ doc("phone_number") }}' 95 | - name: account_balance 96 | description: '{{ doc("account_balance") }}' 97 | - name: nation_key 98 | description: foreign key for nation 99 | - name: available_quantity 100 | description: '{{ doc("available_quantity") }}' 101 | - name: cost 102 | description: '{{ doc("cost") }}' -------------------------------------------------------------------------------- /models/marts/intermediate/order_items.sql: -------------------------------------------------------------------------------- 1 | 2 | with orders as ( 3 | 4 | select * from {{ ref('stg_tpch_orders') }} 5 | 6 | ), 7 | 8 | line_item as ( 9 | 10 | select * from {{ ref('stg_tpch_line_items') }} 11 | 12 | ) 13 | select 14 | 15 | line_item.order_item_key, 16 | orders.order_key, 17 | orders.customer_key, 18 | line_item.part_key, 19 | line_item.supplier_key, 20 | orders.order_date, 21 | orders.status_code as order_status_code, 22 | 23 | 24 | line_item.return_flag, 25 | 26 | line_item.line_number, 27 | line_item.status_code as order_item_status_code, 28 | line_item.ship_date, 29 | line_item.commit_date, 30 | line_item.receipt_date, 31 | line_item.ship_mode, 32 | line_item.extended_price, 33 | line_item.quantity, 34 | 35 | -- extended_price is actually the line item total, 36 | -- so we back out the extended price per item 37 | (line_item.extended_price/nullif(line_item.quantity, 0)){{ money() }} as base_price, 38 | line_item.discount_percentage, 39 | (base_price * (1 - line_item.discount_percentage)){{ money() }} as discounted_price, 40 | 41 | line_item.extended_price as gross_item_sales_amount, 42 | (line_item.extended_price * (1 - line_item.discount_percentage)){{ money() }} as discounted_item_sales_amount, 43 | -- We model discounts as negative amounts 44 | (-1 * line_item.extended_price * line_item.discount_percentage){{ money() }} as item_discount_amount, 45 | line_item.tax_rate, 46 | ((gross_item_sales_amount + item_discount_amount) * line_item.tax_rate){{ money() }} as item_tax_amount, 47 | ( 48 | gross_item_sales_amount + 49 | item_discount_amount + 50 | item_tax_amount 51 | ){{ money() }} as net_item_sales_amount 52 | 53 | from 54 | orders 55 | inner join line_item 56 | on orders.order_key = line_item.order_key 57 | order by 58 | orders.order_date -------------------------------------------------------------------------------- /models/marts/intermediate/part_suppliers.sql: -------------------------------------------------------------------------------- 1 | with part as ( 2 | 3 | select * from {{ ref('stg_tpch_parts') }} 4 | 5 | ), 6 | 7 | supplier as ( 8 | 9 | select * from {{ ref('stg_tpch_suppliers') }} 10 | 11 | ), 12 | 13 | part_supplier as ( 14 | 15 | select * from {{ ref('stg_tpch_part_suppliers') }} 16 | 17 | ), 18 | 19 | final as ( 20 | select 21 | 22 | part_supplier.part_supplier_key, 23 | part.part_key, 24 | part.name as part_name, 25 | part.manufacturer, 26 | part.brand, 27 | part.type as part_type, 28 | part.size as part_size, 29 | part.container, 30 | part.retail_price, 31 | 32 | supplier.supplier_key, 33 | supplier.supplier_name, 34 | supplier.supplier_address, 35 | supplier.phone_number, 36 | supplier.account_balance, 37 | supplier.nation_key, 38 | 39 | part_supplier.available_quantity, 40 | part_supplier.cost 41 | from 42 | part 43 | inner join 44 | part_supplier 45 | on part.part_key = part_supplier.part_key 46 | inner join 47 | supplier 48 | on part_supplier.supplier_key = supplier.supplier_key 49 | order by 50 | part.part_key 51 | ) 52 | 53 | select * from final 54 | -------------------------------------------------------------------------------- /models/marts/marketing/_models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: int_segment__pages 5 | columns: 6 | - name: anonymous_id 7 | description: "A pseudo-unique substitute for a User ID, for cases when you don’t have an absolutely unique identifier. A userId or an anonymousId is required. See the Identities docs for more details." 8 | meta: 9 | dimension: 10 | type: string 11 | metrics: 12 | number_of_unique_users: 13 | type: sum 14 | - name: context_locale 15 | description: "" 16 | meta: 17 | dimension: 18 | type: string 19 | - name: context_page_referrer 20 | description: "" 21 | meta: 22 | dimension: 23 | type: string 24 | - name: url 25 | description: "Page’s full URL. Segment first looks for the canonical URL. If the canonical URL is not provided, Segment uses `location.href` from the DOM API." 26 | meta: 27 | dimension: 28 | type: string 29 | - name: uuid_ts 30 | description: "" 31 | meta: 32 | dimension: 33 | type: timestamp 34 | - name: context_library_name 35 | description: "" 36 | meta: 37 | dimension: 38 | type: string 39 | - name: context_library_version 40 | description: "" 41 | meta: 42 | dimension: 43 | type: string 44 | - name: id 45 | description: "" 46 | meta: 47 | dimension: 48 | type: string 49 | metrics: 50 | total_page_views: 51 | type: count_distinct 52 | - name: received_at 53 | description: "Automatically set by Segment, the timestamp of when a message is received by Segment It is an ISO-8601 date string. See the Timestamps fields docs for more detail." 54 | meta: 55 | dimension: 56 | type: timestamp 57 | - name: title 58 | description: "Page’s title. Equivalent to `document.title` from the DOM API." 59 | meta: 60 | dimension: 61 | type: string 62 | - name: context_page_title 63 | description: "" 64 | meta: 65 | dimension: 66 | type: string 67 | - name: path 68 | description: "Path portion of the page's URL. Equivalent to `canonical_path` which defaults to `location.pathname` from the DOM API." 69 | meta: 70 | dimension: 71 | type: string 72 | - name: referrer 73 | description: "Previous page’s full URL. Equivalent to `document.referrer` from the DOM API." 74 | meta: 75 | dimension: 76 | type: string 77 | - name: timestamp 78 | description: "Timestamp when the message itself took place, defaulted to the current time by the Segment Tracking API, as a ISO-8601 format date string. If the event just happened, leave it out and we’ll use the server’s time. If you’re importing data from the past, make sure you to provide a timestamp.See the Timestamps fields docs for more detail." 79 | meta: 80 | dimension: 81 | type: timestamp 82 | - name: original_timestamp 83 | description: "" 84 | meta: 85 | dimension: 86 | type: timestamp 87 | - name: sent_at 88 | description: "Timestamp of when a message is sent to Segment, used for clock skew correction It is set automatically by the Segment tracking libraries. It is an ISO-8601 date string. See the Timestamps fields docs for more detail." 89 | meta: 90 | dimension: 91 | type: timestamp 92 | - name: context_ip 93 | description: "" 94 | meta: 95 | dimension: 96 | type: string 97 | - name: context_page_path 98 | description: "" 99 | meta: 100 | dimension: 101 | type: string 102 | - name: context_page_url 103 | description: "" 104 | meta: 105 | dimension: 106 | type: string 107 | - name: context_user_agent 108 | description: "" 109 | meta: 110 | dimension: 111 | type: string 112 | - name: src 113 | description: "The application source" 114 | meta: 115 | dimension: 116 | type: string 117 | - name: device 118 | description: "Type of device accessing the page" 119 | meta: 120 | dimension: 121 | type: string 122 | - name: device_category 123 | description: "Category of device accessing the page" 124 | meta: 125 | dimension: 126 | type: string 127 | - name: page_url_host 128 | description: "Page URL Host" 129 | meta: 130 | dimension: 131 | type: string 132 | - name: referrer_host 133 | description: "Host of the referrer" 134 | meta: 135 | dimension: 136 | type: string 137 | 138 | - name: int_segment__tracks 139 | columns: 140 | - name: event_text 141 | description: "" 142 | meta: 143 | dimension: 144 | type: string 145 | - name: context_library_version 146 | description: "" 147 | meta: 148 | dimension: 149 | type: string 150 | - name: context_page_referrer 151 | description: "" 152 | meta: 153 | dimension: 154 | type: string 155 | - name: context_page_url 156 | description: "" 157 | meta: 158 | dimension: 159 | type: string 160 | - name: event 161 | description: "" 162 | meta: 163 | dimension: 164 | type: string 165 | - name: context_ip 166 | description: "" 167 | meta: 168 | dimension: 169 | type: string 170 | - name: context_page_path 171 | description: "" 172 | meta: 173 | dimension: 174 | type: string 175 | - name: context_page_title 176 | description: "" 177 | meta: 178 | dimension: 179 | type: string 180 | - name: received_at 181 | description: "" 182 | meta: 183 | dimension: 184 | type: timestamp 185 | - name: sent_at 186 | description: "" 187 | meta: 188 | dimension: 189 | type: timestamp 190 | - name: timestamp 191 | description: "" 192 | meta: 193 | dimension: 194 | type: timestamp 195 | - name: uuid_ts 196 | description: "" 197 | meta: 198 | dimension: 199 | type: timestamp 200 | - name: context_locale 201 | description: "Locale string for the current user, for example en-US." 202 | meta: 203 | dimension: 204 | type: string 205 | - name: context_user_agent 206 | description: "User agent of the device making the request." 207 | meta: 208 | dimension: 209 | type: string 210 | - name: id g 211 | description: "" 212 | meta: 213 | dimension: 214 | type: string 215 | - name: original_timestamp 216 | description: "" 217 | meta: 218 | dimension: 219 | type: timestamp 220 | - name: anonymous_id 221 | description: "" 222 | meta: 223 | dimension: 224 | type: string 225 | - name: context_library_name 226 | description: "" 227 | meta: 228 | dimension: 229 | type: string 230 | - name: src 231 | description: "" 232 | meta: 233 | dimension: 234 | type: string 235 | 236 | - name: int_segment__link_clicked 237 | -------------------------------------------------------------------------------- /models/marts/marketing/int_segment__link_clicked.sql: -------------------------------------------------------------------------------- 1 | {% set sources = ['dbtc', 'yahooquery'] %} 2 | 3 | with 4 | 5 | {% for source in sources %} 6 | 7 | {{ source }}_source as ( 8 | select 9 | id, 10 | original_timestamp, 11 | received_at, 12 | timestamp, 13 | anonymous_id, 14 | context_ip, 15 | context_user_agent, 16 | link, 17 | context_library_version, 18 | event, 19 | event_text, 20 | uuid_ts, 21 | context_page_title, 22 | context_page_url, 23 | sent_at, 24 | context_library_name, 25 | context_locale, 26 | context_page_path, 27 | context_page_referrer, 28 | '{{ source }}' as src 29 | 30 | from {{ ref('stg_' ~ source ~ '__link_clicked') }} 31 | ), 32 | 33 | {% endfor %} 34 | 35 | unioned_sources as ( 36 | {% for source in sources %} 37 | select * from {{ source }}_source 38 | {% if not loop.last %}union all{% endif %} 39 | {% endfor %} 40 | ) 41 | 42 | select * from unioned_sources 43 | -------------------------------------------------------------------------------- /models/marts/marketing/int_segment__pages.sql: -------------------------------------------------------------------------------- 1 | {% set sources = ['dbtc', 'yahooquery'] %} 2 | 3 | with 4 | 5 | {% for source in sources %} 6 | 7 | {{ source }}_source as ( 8 | select 9 | anonymous_id, 10 | context_locale, 11 | context_page_referrer, 12 | url, 13 | uuid_ts, 14 | context_library_name, 15 | context_library_version, 16 | id, 17 | received_at, 18 | title, 19 | context_page_title, 20 | path, 21 | referrer, 22 | timestamp, 23 | original_timestamp, 24 | sent_at, 25 | context_ip, 26 | context_page_path, 27 | context_page_url, 28 | context_user_agent, 29 | page_url_host, 30 | referrer_host, 31 | gclid, 32 | device, 33 | device_category, 34 | '{{ source }}' as src 35 | 36 | from {{ ref('stg_' ~ source ~ '__pages') }} 37 | where url not like 'http://127.0.0.1:8000%' 38 | and url not like 'http://localhost:8000%' 39 | ), 40 | 41 | {% endfor %} 42 | 43 | unioned_sources as ( 44 | {% for source in sources %} 45 | select * from {{ source }}_source 46 | {% if not loop.last %}union all{% endif %} 47 | {% endfor %} 48 | ) 49 | 50 | select * from unioned_sources 51 | -------------------------------------------------------------------------------- /models/marts/marketing/int_segment__tracks.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% set sources = ['dbtc', 'yahooquery'] -%} 4 | 5 | with 6 | 7 | {% for source in sources -%} 8 | 9 | {{ source }}_source as ( 10 | select 11 | event_text, 12 | context_library_version, 13 | context_page_referrer, 14 | context_page_url, 15 | event, 16 | context_ip, 17 | context_page_path, 18 | context_page_title, 19 | received_at, 20 | sent_at, 21 | timestamp, 22 | uuid_ts, 23 | context_locale, 24 | context_user_agent, 25 | id, 26 | original_timestamp, 27 | anonymous_id, 28 | context_library_name, 29 | '{{ source }}' as src 30 | 31 | from {{ ref('stg_' ~ source ~ '__tracks') }} 32 | ), 33 | 34 | {% endfor -%} 35 | 36 | unioned_sources as ( 37 | {% for source in sources -%} 38 | select * from {{ source }}_source 39 | {% if not loop.last %}union all{% endif %} 40 | {% endfor -%} 41 | ) 42 | 43 | select * from unioned_sources 44 | -------------------------------------------------------------------------------- /models/metrics/revenue_weekly_by_ship_mode.sql: -------------------------------------------------------------------------------- 1 | select 2 | * 3 | from {{ metrics.calculate( 4 | metric('total_revenue'), 5 | grain='week', 6 | dimensions=['ship_mode'] 7 | ) }} -------------------------------------------------------------------------------- /models/metrics/tpch_metrics.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | metrics: 4 | - &finance-metric 5 | name: total_revenue 6 | label: Total Revenue 7 | model: ref('fct_order_items') 8 | description: Total income from all orders 9 | 10 | calculation_method: sum 11 | expression: gross_item_sales_amount 12 | 13 | timestamp: order_date 14 | time_grains: [day, week, month, quarter, year] 15 | 16 | dimensions: 17 | - return_flag 18 | - order_item_status_code 19 | - supplier_key 20 | - ship_mode 21 | 22 | - <<: *finance-metric 23 | name: total_customers 24 | label: Total Customers 25 | model: ref('fct_order_items') 26 | description: Total customers with an order 27 | calculation_method: count_distinct 28 | expression: customer_key 29 | 30 | - <<: *finance-metric 31 | name: total_expenses 32 | label: Total Expenses 33 | model: ref('fct_order_items') 34 | description: Total expenses from all orders 35 | expression: supplier_cost 36 | 37 | - <<: *finance-metric 38 | name: total_profit 39 | label: Total Profit 40 | description: Total profit from all orders 41 | calculation_method: derived 42 | model: # Derived metrics cannot have a 'model' property 43 | expression: "{{ metric('total_revenue') }} - {{ metric('total_expenses') }}" 44 | 45 | - <<: *finance-metric 46 | name: average_revenue_per_customer 47 | label: Average Revenue Per Customer 48 | description: The average revenue received per customer 49 | calculation_method: derived 50 | model: # Derived metrics cannot have a 'model' property 51 | expression: "{{ metric('total_revenue') }} / {{ metric('total_customers') }}" 52 | -------------------------------------------------------------------------------- /models/ml/forecast_score_py.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from prophet import Prophet 4 | from prophet.serialize import model_from_json 5 | 6 | 7 | def model(dbt, session): 8 | 9 | # dbt configuration 10 | dbt.config(packages=['pandas', 'prophet'], snowflake_warehouse='SNOWPARK_WH') 11 | 12 | # get trained ML models 13 | # TODO: filter by trained_at to last X days or something 14 | models = dbt.ref('forecast_train_py').to_pandas() 15 | 16 | # get most recent trained_at 17 | most_recent_trained_at = models['trained_at'].max() 18 | 19 | # filter models by most recent trained_at 20 | models = models[models['trained_at'] == most_recent_trained_at] 21 | 22 | # get list of unique locations dynamically 23 | locations = sorted(list(models['location'].unique())) 24 | 25 | # hydrate models as Prophet objects 26 | models = { 27 | location: model_from_json( 28 | models[models['location'] == location]['model'].iloc[0] 29 | ) 30 | for location in locations 31 | } 32 | 33 | # create future dataframe to forecast on 34 | future = models[locations[0]].make_future_dataframe(periods=52 * 3, freq='W') 35 | 36 | # score model per location 37 | forecasts = {location: models[location].predict(future) for location in locations} 38 | 39 | # dataframe magic (use location to filter forecasts from single table) 40 | for location, forecast in forecasts.items(): 41 | forecast['location'] = location 42 | 43 | # create a single dataframe to return 44 | df = pd.concat(forecasts.values()) 45 | 46 | return df -------------------------------------------------------------------------------- /models/ml/forecast_train_py.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from datetime import datetime 4 | 5 | from prophet import Prophet 6 | from prophet.serialize import model_to_json 7 | 8 | 9 | def model(dbt, session): 10 | 11 | # dbt configuration 12 | dbt.config( 13 | materialized='incremental', 14 | packages=['pandas', 'prophet'], 15 | snowflake_warehouse='SNOWPARK_WH', 16 | ) 17 | 18 | # get upstream data 19 | revenue = dbt.ref('revenue_weekly_by_ship_mode').to_pandas() 20 | 21 | # rename to match Prophet's expected column names 22 | renames = { 23 | 'date_week'.upper(): 'ds', 24 | 'ship_mode'.upper(): 'location', 25 | 'total_revenue'.upper(): 'y', 26 | } 27 | revenue = revenue.rename(columns=renames) 28 | 29 | # get list of unique locations dynamically 30 | locations = sorted(list(revenue['location'].unique())) 31 | 32 | # train the ML models per location 33 | models = [ 34 | Prophet().fit(revenue[revenue['location'] == location]) 35 | for location in locations 36 | ] 37 | 38 | # use current time to 'version' models 39 | trained_at = datetime.now() 40 | 41 | # persist models -- serialize Prophet as JSON via provided method 42 | df = pd.DataFrame( 43 | { 44 | 'trained_at': [trained_at] * len(locations), 45 | 'location': locations, 46 | 'model': [model_to_json(model) for model in models], 47 | } 48 | ) 49 | 50 | return df -------------------------------------------------------------------------------- /models/overview.md: -------------------------------------------------------------------------------- 1 | {% docs __dbt_utils__ %} 2 | # Utility macros 3 | This package is maintained by dbt Labs. 4 | 5 | Our dbt project heavily uses this suite of utility macros, especially: 6 | - `surrogate_key` 7 | - `test_equality` 8 | - `pivot` 9 | 10 | for more info, please check [dbt_utils](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) 11 | {% enddocs %} 12 | 13 | {% docs __codegen__ %} 14 | # codegen 15 | This package is maintained by dbt Labs. 16 | 17 | It provides macros that are especially helpful in creating yml files and base models quickly. 18 | 19 | for more info, please check [codgen](https://hub.getdbt.com/dbt-labs/codegen/latest/) 20 | {% enddocs %} 21 | 22 | {% docs __snowflake_spend__ %} 23 | # snowflake_spend 24 | This package is provided by the community -- created by gitlabhq. 25 | 26 | The intent of this package is to understand the cost your Snowflake Data Warehouse is accruing 27 | 28 | for more info, please check [snowflake_spend](https://hub.getdbt.com/gitlabhq/snowflake_spend/latest/) 29 | {% enddocs %} 30 | 31 | {% docs __dbt_external_tables__ %} 32 | # dbt_external_tables 33 | This package is maintained by dbt Labs. 34 | 35 | It provides macros to create/replace external tables and refresh their partitions, using the metadata provided in your .yml file source definitions 36 | 37 | for more info, please check [dbt_external_tables](https://hub.getdbt.com/dbt-labs/dbt_external_tables/latest/) 38 | {% enddocs %} -------------------------------------------------------------------------------- /models/staging/segment/_segment__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: dbtc 5 | tables: &tables 6 | - name: link_clicked 7 | columns: 8 | - name: link 9 | tests: 10 | - not_null: 11 | severity: warn 12 | - name: id 13 | tests: 14 | - not_null 15 | - unique 16 | - name: event 17 | tests: 18 | - not_null 19 | - name: sent_at 20 | tests: 21 | - not_null 22 | - name: pages 23 | columns: 24 | - name: id 25 | tests: 26 | - not_null 27 | - unique 28 | - name: sent_at 29 | tests: 30 | - not_null 31 | - name: tracks 32 | columns: 33 | - name: event_text 34 | tests: 35 | - not_null 36 | - name: id 37 | tests: 38 | - not_null 39 | - unique 40 | - name: event 41 | tests: 42 | - not_null 43 | - name: sent_at 44 | tests: 45 | - not_null 46 | 47 | - name: yahooquery 48 | tables: *tables 49 | -------------------------------------------------------------------------------- /models/staging/segment/dbtc/stg_dbtc__link_clicked.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('dbtc', 'link_clicked') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | id, 11 | original_timestamp, 12 | received_at, 13 | timestamp, 14 | anonymous_id, 15 | context_ip, 16 | context_user_agent, 17 | link, 18 | context_library_version, 19 | event, 20 | event_text, 21 | uuid_ts, 22 | context_page_title, 23 | context_page_url, 24 | sent_at, 25 | context_library_name, 26 | context_locale, 27 | context_page_path, 28 | context_page_referrer 29 | 30 | from source 31 | 32 | ) 33 | 34 | select * from renamed 35 | -------------------------------------------------------------------------------- /models/staging/segment/dbtc/stg_dbtc__pages.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('dbtc', 'pages') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | anonymous_id, 11 | context_locale, 12 | context_page_referrer, 13 | url, 14 | uuid_ts, 15 | context_library_name, 16 | context_library_version, 17 | id, 18 | received_at, 19 | title, 20 | context_page_title, 21 | path, 22 | referrer, 23 | timestamp, 24 | original_timestamp, 25 | sent_at, 26 | context_ip, 27 | context_page_path, 28 | context_page_url, 29 | context_user_agent, 30 | 31 | -- calculated 32 | {{ dbt_utils.get_url_host('url') }} as page_url_host, 33 | replace( 34 | {{ dbt_utils.get_url_host('referrer') }}, 35 | 'www.', 36 | '' 37 | ) as referrer_host, 38 | {{ dbt_utils.get_url_parameter('url', 'gclid') }} as gclid, 39 | case 40 | when lower(context_user_agent) like '%android%' then 'Android' 41 | else replace( 42 | {{ dbt.split_part(dbt.split_part('context_user_agent', "'('", 2), "' '", 1) }}, 43 | ';', '') 44 | end as device, 45 | case 46 | when device = 'iPhone' then 'iPhone' 47 | when device = 'Android' then 'Android' 48 | when device in ('iPad', 'iPod') then 'Tablet' 49 | when device in ('Windows', 'Macintosh', 'X11') then 'Desktop' 50 | else 'Uncategorized' 51 | end as device_category 52 | 53 | from source 54 | 55 | ) 56 | 57 | select * from renamed 58 | -------------------------------------------------------------------------------- /models/staging/segment/dbtc/stg_dbtc__tracks.sql: -------------------------------------------------------------------------------- 1 | 2 | with source as ( 3 | 4 | select * from {{ source('dbtc', 'tracks') }} 5 | 6 | ), 7 | 8 | renamed as ( 9 | 10 | select 11 | event_text, 12 | context_library_version, 13 | context_page_referrer, 14 | context_page_url, 15 | event, 16 | context_ip, 17 | context_page_path, 18 | context_page_title, 19 | received_at, 20 | sent_at, 21 | timestamp, 22 | uuid_ts, 23 | context_locale, 24 | context_user_agent, 25 | id, 26 | original_timestamp, 27 | anonymous_id, 28 | context_library_name 29 | 30 | from source 31 | 32 | ) 33 | 34 | select * from renamed 35 | -------------------------------------------------------------------------------- /models/staging/segment/yahooquery/stg_yahooquery__link_clicked.sql: -------------------------------------------------------------------------------- 1 | 2 | with source as ( 3 | 4 | select * from {{ source('yahooquery', 'link_clicked') }} 5 | 6 | ), 7 | 8 | renamed as ( 9 | 10 | select 11 | id, 12 | original_timestamp, 13 | received_at, 14 | timestamp, 15 | anonymous_id, 16 | context_ip, 17 | context_user_agent, 18 | link, 19 | context_library_version, 20 | event, 21 | event_text, 22 | uuid_ts, 23 | context_page_title, 24 | context_page_url, 25 | sent_at, 26 | context_library_name, 27 | context_locale, 28 | context_page_path, 29 | context_page_referrer 30 | 31 | from source 32 | 33 | ) 34 | 35 | select * from renamed 36 | -------------------------------------------------------------------------------- /models/staging/segment/yahooquery/stg_yahooquery__pages.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('yahooquery', 'pages') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | anonymous_id, 11 | context_locale, 12 | context_page_referrer, 13 | url, 14 | uuid_ts, 15 | context_library_name, 16 | context_library_version, 17 | id, 18 | received_at, 19 | title, 20 | context_page_title, 21 | path, 22 | referrer, 23 | timestamp, 24 | original_timestamp, 25 | sent_at, 26 | context_ip, 27 | context_page_path, 28 | context_page_url, 29 | context_user_agent, 30 | 31 | -- calculated 32 | {{ dbt_utils.get_url_host('url') }} as page_url_host, 33 | replace( 34 | {{ dbt_utils.get_url_host('referrer') }}, 35 | 'www.', 36 | '' 37 | ) as referrer_host, 38 | {{ dbt_utils.get_url_parameter('url', 'gclid') }} as gclid, 39 | case 40 | when lower(context_user_agent) like '%android%' then 'Android' 41 | else replace( 42 | {{ dbt.split_part(dbt.split_part('context_user_agent', "'('", 2), "' '", 1) }}, 43 | ';', '') 44 | end as device, 45 | case 46 | when device = 'iPhone' then 'iPhone' 47 | when device = 'Android' then 'Android' 48 | when device in ('iPad', 'iPod') then 'Tablet' 49 | when device in ('Windows', 'Macintosh', 'X11') then 'Desktop' 50 | else 'Uncategorized' 51 | end as device_category 52 | 53 | from source 54 | 55 | ) 56 | 57 | select * from renamed 58 | -------------------------------------------------------------------------------- /models/staging/segment/yahooquery/stg_yahooquery__tracks.sql: -------------------------------------------------------------------------------- 1 | 2 | with source as ( 3 | 4 | select * from {{ source('yahooquery', 'tracks') }} 5 | 6 | ), 7 | 8 | renamed as ( 9 | 10 | select 11 | event_text, 12 | context_library_version, 13 | context_page_referrer, 14 | context_page_url, 15 | event, 16 | context_ip, 17 | context_page_path, 18 | context_page_title, 19 | received_at, 20 | sent_at, 21 | timestamp, 22 | uuid_ts, 23 | context_locale, 24 | context_user_agent, 25 | id, 26 | original_timestamp, 27 | anonymous_id, 28 | context_library_name 29 | 30 | from source 31 | 32 | ) 33 | 34 | select * from renamed 35 | -------------------------------------------------------------------------------- /models/staging/tpch/_tpch__docs.md: -------------------------------------------------------------------------------- 1 | # the intent of this .md is to remove redundancy in the documentation 2 | 3 | # used in the tpch source docs page 4 | 5 | {% docs tpch_source %} 6 | Welcome to the dbt Labs demo dbt project! We use the [TPCH dataset](https://docs.snowflake.com/en/user-guide/sample-data-tpch.html) to create a sample project to emulate what a production project might look like! 7 | 8 | ![ERD](https://raw.githubusercontent.com/dpguthrie/snowflake-dbt-demo-project/main/assets/tpch_erd.png) 9 | {% enddocs %} 10 | 11 | 12 | # the below are descriptions from stg_tpch_line_items 13 | 14 | {% docs order_item_key %} surrogate key for the model -- combo of order_key + line_number {% enddocs %} 15 | 16 | {% docs line_number %} sequence of the order items within the order {% enddocs %} 17 | 18 | {% docs return_flag %} letter determining the status of the return {% enddocs %} 19 | 20 | {% docs ship_date %} the date the order item is being shipped {% enddocs %} 21 | 22 | {% docs commit_date %} the date the order item is being commited {% enddocs %} 23 | 24 | {% docs receipt_date %} the receipt date of the order item {% enddocs %} 25 | 26 | {% docs ship_mode %} method of shipping {% enddocs %} 27 | 28 | {% docs comment %} additional commentary {% enddocs %} 29 | 30 | {% docs extended_price %} line item price {% enddocs %} 31 | 32 | {% docs discount_percentage %} percentage of the discount {% enddocs %} 33 | 34 | 35 | # the below are descriptions from stg_tpch_supppliers 36 | 37 | {% docs supplier_name %} id of the supplier {% enddocs %} 38 | 39 | {% docs supplier_address %} address of the supplier {% enddocs %} 40 | 41 | {% docs phone_number %} phone number of the supplier {% enddocs %} 42 | 43 | {% docs account_balance %} raw account balance {% enddocs %} 44 | 45 | # the below are descriptions from stg_tpch_parts 46 | 47 | {% docs retail_price %} raw retail price {% enddocs %} 48 | 49 | # the below are descriptions from stg_tpch_part_suppliers 50 | 51 | {% docs available_quantity %} raw available quantity {% enddocs %} 52 | 53 | {% docs cost %} raw cost {% enddocs %} 54 | -------------------------------------------------------------------------------- /models/staging/tpch/_tpch__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_tpch_customers 5 | description: staging layer for customers data 6 | columns: 7 | - name: customer_key 8 | description: primary key of the model 9 | tests: 10 | - unique 11 | - not_null 12 | - name: name 13 | description: customer id 14 | - name: address 15 | description: address of the customer 16 | - name: nation_key 17 | description: foreign key to stg_tpch_nations 18 | - name: phone_number 19 | description: phone number of the customer 20 | - name: account_balance 21 | description: total account balance from the customer 22 | - name: market_segment 23 | description: market segment of the customer 24 | - name: comment 25 | description: '{{ doc("comment") }}' 26 | meta: 27 | joins: 28 | - join: stg_tpch_nations 29 | sql_on: ${stg_tpch_customers.nation_key} = ${stg_tpch_nations.nation_key} 30 | 31 | - name: stg_tpch_line_items 32 | description: staging layer for line items data 33 | columns: 34 | - name: order_item_key 35 | description: '{{ doc("order_item_key") }}' 36 | tests: 37 | - unique 38 | - not_null 39 | - name: order_key 40 | description: foreign key to stg_tpch_orders 41 | - name: part_key 42 | description: foreign key to stg_tpch_part_suppliers 43 | - name: supplier_key 44 | description: foreign key to stg_tpch_suppliers 45 | - name: line_number 46 | description: '{{ doc("line_number") }}' 47 | - name: quantity 48 | description: total units 49 | - name: extended_price 50 | description: '{{ doc("extended_price") }}' 51 | - name: discount_percentage 52 | description: '{{ doc("discount_percentage") }}' 53 | - name: tax_rate 54 | description: tax rate of the order item 55 | - name: return_flag 56 | description: '{{ doc("return_flag") }}' 57 | - name: status_code 58 | description: status code of the order item 59 | - name: ship_date 60 | description: '{{ doc("ship_date") }}' 61 | - name: commit_date 62 | description: '{{ doc("commit_date") }}' 63 | - name: receipt_date 64 | description: '{{ doc("receipt_date") }}' 65 | - name: ship_instructions 66 | description: additional instructions on the shipment 67 | - name: ship_mode 68 | description: '{{ doc("ship_mode") }}' 69 | - name: comment 70 | description: '{{ doc("comment") }}' 71 | 72 | - name: stg_tpch_nations 73 | description: staging layer for nations data 74 | columns: 75 | - name: nation_key 76 | description: primary key of the model 77 | tests: 78 | - unique 79 | - not_null 80 | - name: name 81 | description: nation name 82 | - name: region_key 83 | description: foreign key to stg_tpch_regions 84 | - name: comment 85 | description: additional commentary 86 | meta: 87 | joins: 88 | - join: stg_tpch_regions 89 | sql_on: ${stg_tpch_nations.region_key} = ${stg_tpch_regions.region_key} 90 | 91 | - name: stg_tpch_orders 92 | description: staging layer for orders data 93 | columns: 94 | - name: order_key 95 | description: primary key of the model 96 | tests: 97 | - unique 98 | - not_null 99 | - name: customer_key 100 | description: foreign key to stg_tpch_customers 101 | - name: status_code 102 | description: status of the order 103 | - name: total_price 104 | description: raw price 105 | - name: order_date 106 | description: date the order was made 107 | - name: priority_code 108 | description: code associated with the order 109 | - name: clerk_name 110 | description: id of the clerk 111 | - name: ship_priority 112 | description: numeric representation of the shipping priority, zero being the default 113 | - name: comment 114 | description: '{{ doc("comment") }}' 115 | 116 | - name: stg_tpch_part_suppliers 117 | description: staging layer for suppliers data 118 | columns: 119 | - name: part_supplier_key 120 | description: surrogate key for the model -- combo of ps_partkey + ps_suppkey 121 | tests: 122 | - unique 123 | - not_null 124 | - name: part_key 125 | description: foreign key to stg_tpch_parts 126 | - name: supplier_key 127 | description: foreign key to stg_tpch_suppliers 128 | - name: available_quantity 129 | description: '{{ doc("available_quantity") }}' 130 | - name: cost 131 | description: '{{ doc("cost") }}' 132 | - name: comment 133 | description: '{{ doc("comment") }}' 134 | meta: 135 | joins: 136 | - join: stg_tpch_suppliers 137 | sql_on: ${stg_tpch_part_suppliers.supplier_key} = ${stg_tpch_suppliers.supplier_key} 138 | 139 | - name: stg_tpch_parts 140 | description: staging layer for parts data 141 | columns: 142 | - name: part_key 143 | description: primary key of the model 144 | tests: 145 | - unique 146 | - not_null 147 | - name: name 148 | description: name of the part 149 | - name: manufacturer 150 | description: manufacturer of the part 151 | - name: brand 152 | description: brand of the part 153 | - name: type 154 | description: type of part including material 155 | - name: size 156 | description: size of the part 157 | - name: container 158 | description: container of the part 159 | - name: retail_price 160 | description: '{{ doc("retail_price") }}' 161 | - name: comment 162 | description: '{{ doc("comment") }}' 163 | 164 | - name: stg_tpch_regions 165 | description: staging layer for regions data 166 | columns: 167 | - name: region_key 168 | description: primary key of the model 169 | tests: 170 | - unique 171 | - not_null 172 | - name: name 173 | description: region name 174 | - name: comment 175 | description: '{{ doc("comment") }}' 176 | 177 | - name: stg_tpch_suppliers 178 | description: staging layer for suppliers data 179 | columns: 180 | - name: supplier_key 181 | description: primary key of the model 182 | tests: 183 | - unique 184 | - not_null 185 | - name: supplier_name 186 | description: '{{ doc("supplier_name") }}' 187 | - name: supplier_address 188 | description: '{{ doc("supplier_address") }}' 189 | - name: nation_key 190 | description: foreign key to stg_tpch_nations 191 | - name: phone_number 192 | description: '{{ doc("phone_number") }}' 193 | - name: account_balance 194 | description: '{{ doc("account_balance") }}' 195 | - name: comment 196 | description: '{{ doc("comment") }}' 197 | meta: 198 | joins: 199 | - join: stg_tpch_nations 200 | sql_on: ${stg_tpch_suppliers.nation_key} = ${stg_tpch_nations.nation_key} 201 | -------------------------------------------------------------------------------- /models/staging/tpch/_tpch__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | 4 | sources: 5 | - name: tpch 6 | description: '{{ doc("tpch_source") }}' 7 | database: doug_demo_v2 8 | schema: tpch 9 | freshness: 10 | warn_after: {count: 24, period: hour} 11 | error_after: {count: 48, period: hour} 12 | loaded_at_field: _etl_updated_timestamp 13 | tables: 14 | - name: orders 15 | description: main order tracking table 16 | columns: 17 | - name: o_orderkey 18 | description: SF*1,500,000 are sparsely populated 19 | tests: 20 | - not_null 21 | - unique 22 | 23 | - name: o_custkey 24 | description: Foreign Key to C_CUSTKEY 25 | tests: 26 | - relationships: 27 | to: source('tpch', 'customer') 28 | field: c_custkey 29 | - name: o_orderstatus 30 | description: status code of the order 31 | - name: o_totalprice 32 | description: raw price 33 | - name: o_orderdate 34 | description: date the order was made 35 | - name: o_orderpriority 36 | description: code associated with the order 37 | - name: o_clerk 38 | description: id of the clerk 39 | - name: o_shippriority 40 | description: numeric representation of the shipping priority, zero being the default 41 | - name: o_comment 42 | description: '{{ doc("comment") }}' 43 | 44 | - name: customer 45 | description: main customer table 46 | columns: 47 | - name: c_custkey 48 | description: unique customer key 49 | tests: 50 | - not_null 51 | - unique 52 | - name: c_name 53 | description: customer id 54 | - name: c_address 55 | description: address of the customer 56 | - name: c_nationkey 57 | description: foreign key to stg_tpch_nations 58 | tests: 59 | - relationships: 60 | to: source('tpch', 'nation') 61 | field: n_nationkey 62 | - name: c_phone 63 | description: phone number of the customer 64 | - name: c_acctbal 65 | description: raw account balance 66 | - name: c_mktsegment 67 | description: market segment of the customer 68 | - name: c_comment 69 | description: '{{ doc("comment") }}' 70 | 71 | - name: lineitem 72 | description: main lineitem table 73 | columns: 74 | - name: l_orderkey 75 | description: Foreign Key to O_ORDERKEY 76 | tests: 77 | - relationships: 78 | to: source('tpch', 'orders') 79 | field: o_orderkey 80 | - name: l_partkey 81 | description: Foreign key to P_PARTKEY, first part of the compound Foreign Key to (PS_PARTKEY,PS_SUPPKEY) with L_SUPPKEY 82 | tests: 83 | - relationships: 84 | to: source('tpch', 'part') 85 | field: p_partkey 86 | - name: l_suppkey 87 | description: Foreign key to S_SUPPKEY, second part of the compound Foreign Key to (PS_PARTKEY, PS_SUPPKEY) with L_PARTKEY 88 | tests: 89 | - relationships: 90 | to: source('tpch', 'supplier') 91 | field: s_suppkey 92 | - name: l_linenumber 93 | description: sequence of the order items within the order 94 | - name: l_quantity 95 | description: total units 96 | - name: l_extendedprice 97 | description: line item price 98 | - name: l_discount 99 | description: percentage of the discount 100 | - name: l_tax 101 | description: tax rate of the order item 102 | - name: l_returnflag 103 | description: letter determining the status of the return 104 | - name: l_linestatus 105 | description: status code of the order item 106 | - name: l_shipdate 107 | description: the date the order item is being shipped 108 | - name: l_commitdate 109 | description: the date the order item is being commited 110 | - name: l_receiptdate 111 | description: the receipt date of the order item 112 | - name: l_shipinstruct 113 | description: additional instructions on the shipment 114 | - name: l_shipmode 115 | description: method of shipping 116 | - name: l_comment 117 | description: '{{ doc("comment") }}' 118 | 119 | - name: nation 120 | description: nation mapping 121 | freshness: null 122 | columns: 123 | - name: n_nationkey 124 | description: 25 nations are populated 125 | tests: 126 | - not_null 127 | - unique 128 | - name: n_name 129 | description: nation name 130 | - name: n_regionkey 131 | description: Foreign Key to R_REGIONKEY 132 | tests: 133 | - relationships: 134 | to: source('tpch', 'region') 135 | field: r_regionkey 136 | - name: n_comment 137 | description: '{{ doc("comment") }}' 138 | 139 | - name: part 140 | description: main part table 141 | columns: 142 | - name: p_partkey 143 | description: SF*200,000 are populated 144 | tests: 145 | - not_null 146 | - unique 147 | - name: p_name 148 | description: name of the part 149 | - name: p_mfgr 150 | description: manufacturer of the part 151 | - name: p_brand 152 | description: brand of the part 153 | - name: p_type 154 | description: type of part including material 155 | - name: p_size 156 | description: size of the part 157 | - name: p_container 158 | description: container of the part 159 | - name: p_retailprice 160 | description: raw retail price 161 | tests: 162 | - dbt_utils.accepted_range: 163 | min_value: 0 164 | max_value: 2000 165 | - name: p_comment 166 | description: '{{ doc("comment") }}' 167 | 168 | - name: partsupp 169 | description: main part supplier table 170 | columns: 171 | - name: ps_partkey 172 | description: Foreign Key to P_PARTKEY 173 | tests: 174 | - relationships: 175 | to: source('tpch', 'part') 176 | field: p_partkey 177 | - name: ps_suppkey 178 | description: Foreign Key to S_SUPPKEY 179 | tests: 180 | - relationships: 181 | to: source('tpch', 'supplier') 182 | field: s_suppkey 183 | - name: ps_availqty 184 | description: raw available quantity 185 | - name: ps_supplycost 186 | description: raw cost 187 | - name: ps_comment 188 | description: '{{ doc("comment") }}' 189 | 190 | - name: region 191 | description: region mapping 192 | freshness: null 193 | columns: 194 | - name: r_regionkey 195 | description: 5 regions are populated 196 | tests: 197 | - not_null 198 | - unique 199 | - name: r_name 200 | description: region name 201 | tests: 202 | - accepted_values: 203 | values: ['AFRICA', 'AMERICA', 'ASIA', 'EUROPE', 'MIDDLE EAST'] 204 | - name: r_comment 205 | description: '{{ doc("comment") }}' 206 | 207 | - name: supplier 208 | description: main supplier table 209 | freshness: null 210 | columns: 211 | - name: s_suppkey 212 | description: SF*10,000 are populated 213 | tests: 214 | - not_null 215 | - unique 216 | - name: s_name 217 | description: id of the supplier 218 | - name: s_address 219 | description: address of the supplier 220 | - name: s_nationkey 221 | description: Foreign Key to N_NATIONKEY 222 | tests: 223 | - relationships: 224 | to: source('tpch', 'nation') 225 | field: n_nationkey 226 | - name: s_phone 227 | description: phone number of the supplier 228 | - name: s_acctbal 229 | description: raw account balance 230 | - name: s_comment 231 | description: '{{ doc("comment") }}' 232 | 233 | - name: tpch_snapshot 234 | database: analytics 235 | schema: dbt_mwinkler 236 | tables: 237 | - name: customer_snapshot_src 238 | description: slowly changing dimension table 239 | columns: 240 | - name: c_custkey 241 | description: unique customer key 242 | tests: 243 | - unique: 244 | severity: warn 245 | - not_null: 246 | severity: warn 247 | - name: c_name 248 | - name: c_address 249 | - name: c_nationkey 250 | - name: c_phone 251 | - name: c_acctbal 252 | - name: c_mktsegment 253 | - name: c_comment 254 | -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_customers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'customer') }} 4 | 5 | ), 6 | 7 | cleanup as ( 8 | 9 | select 10 | 11 | c_custkey as customer_key, 12 | c_name as name, 13 | c_address as address, 14 | c_nationkey as nation_key, 15 | c_phone as phone_number, 16 | c_acctbal as account_balance, 17 | c_mktsegment as market_segment, 18 | c_comment as comment, 19 | 'hello world' as col 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from cleanup 26 | -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_line_items.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'lineitem') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | 11 | {{ dbt_utils.generate_surrogate_key( 12 | ['l_orderkey', 13 | 'l_linenumber']) }} 14 | as order_item_key, 15 | l_orderkey as order_key, 16 | l_partkey as part_key, 17 | l_suppkey as supplier_key, 18 | l_linenumber as line_number, 19 | l_quantity as quantity, 20 | l_extendedprice as extended_price, 21 | l_discount as discount_percentage, 22 | l_tax as tax_rate, 23 | l_returnflag as return_flag, 24 | l_linestatus as status_code, 25 | l_shipdate as ship_date, 26 | l_commitdate as commit_date, 27 | l_receiptdate as receipt_date, 28 | l_shipinstruct as ship_instructions, 29 | l_shipmode as ship_mode, 30 | l_comment as comment 31 | 32 | from source 33 | 34 | ) 35 | 36 | select * from renamed -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_nations.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'nation') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | 11 | n_nationkey as nation_key, 12 | n_name as name, 13 | n_regionkey as region_key, 14 | n_comment as comment 15 | 16 | from source 17 | 18 | ) 19 | 20 | select * from renamed -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_orders.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'orders') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | 11 | o_orderkey as order_key, 12 | o_custkey as customer_key, 13 | o_orderstatus as status_code, 14 | o_totalprice as total_price, 15 | o_orderdate as order_date, 16 | o_orderpriority as priority_code, 17 | o_clerk as clerk_name, 18 | o_shippriority as ship_priority, 19 | o_comment as comment 20 | 21 | 22 | from source 23 | 24 | ) 25 | 26 | select * from renamed 27 | -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_part_suppliers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'partsupp') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | 11 | {{ dbt_utils.generate_surrogate_key( 12 | ['ps_partkey', 13 | 'ps_suppkey']) }} 14 | as part_supplier_key, 15 | ps_partkey as part_key, 16 | ps_suppkey as supplier_key, 17 | ps_availqty as available_quantity, 18 | ps_supplycost as cost, 19 | ps_comment as comment 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_parts.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'part') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | 11 | p_partkey as part_key, 12 | p_name as name, 13 | p_mfgr as manufacturer, 14 | p_brand as brand, 15 | p_type as type, 16 | p_size as size, 17 | p_container as container, 18 | p_retailprice as retail_price, 19 | p_comment as comment 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_regions.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'region') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | r_regionkey as region_key, 11 | r_name as name, 12 | r_comment as comment 13 | 14 | from source 15 | 16 | ) 17 | 18 | select * from renamed -------------------------------------------------------------------------------- /models/staging/tpch/stg_tpch_suppliers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('tpch', 'supplier') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | 11 | s_suppkey as supplier_key, 12 | s_name as supplier_name, 13 | s_address as supplier_address, 14 | s_nationkey as nation_key, 15 | s_phone as phone_number, 16 | s_acctbal as account_balance, 17 | s_comment as comment 18 | 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed -------------------------------------------------------------------------------- /models/test_ads/fct_facebook_ads.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref('int_facebook_ads_agg') }} -------------------------------------------------------------------------------- /models/test_ads/fct_instagram_ads.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref('int_instagram_ads_agg') }} -------------------------------------------------------------------------------- /models/utils/all_days.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ dbt_utils.date_spine( 3 | datepart="day", 4 | start_date="to_date('01/01/1992', 'mm/dd/yyyy')", 5 | end_date="dateadd(year, 1, current_date)" 6 | ) 7 | }} -------------------------------------------------------------------------------- /models/utils/utils.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: all_days 5 | description: This model is an example of how to create a date spine using dbt_utils. Rather than writing a bunch of code to create this, we're able to three lines of code. 6 | columns: 7 | - name: date_day 8 | description: date generated by the macro -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/codegen 3 | version: ["<1.0.0"] 4 | - package: dbt-labs/dbt_utils 5 | version: [">=1.0.0", "<2.0.0"] 6 | - package: dbt-labs/dbt_external_tables 7 | version: ["<1.0.0"] 8 | - package: dbt-labs/metrics 9 | version: ["<1.4.0"] 10 | - package: brooklyn-data/dbt_artifacts 11 | version: ["<3.0.0"] 12 | - git: "https://github.com/dpguthrie/instagram_module" 13 | revision: "{{ env_var('DBT_ENV_IG_REVISION', 'main') }}" 14 | - git: "https://github.com/dpguthrie/facebook_module" 15 | revision: "{{ env_var('DBT_ENV_FB_REVISION', 'main') }}" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Metadata API to Datadog 2 | datadog_api_client 3 | 4 | # ERD 5 | git+https://github.com/dpguthrie/eralchemy.git@master 6 | snowflake-sqlalchemy 7 | 8 | # Both 9 | dbtc 10 | -------------------------------------------------------------------------------- /scripts/clone_databases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpguthrie/snowflake-dbt-demo-project/8e626c5bf41dd281a1e52b10641bd6ec8b411001/scripts/clone_databases.py -------------------------------------------------------------------------------- /scripts/create_profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import yaml 4 | 5 | 6 | if __name__ == '__main__': 7 | 8 | warehouse_config = { 9 | 'type': 'snowflake', 10 | 'account': os.getenv('SF_ACCOUNT', None), 11 | 'user': os.getenv('SF_USER', None), 12 | 'password': os.getenv('SF_PASSWORD', None), 13 | 'role': os.getenv('SF_ROLE', 'TRANSFORMER'), 14 | 'database': os.getenv('SF_DATABASE', 'DOUG_DEMO_V2'), 15 | 'schema': os.getenv('SF_SCHEMA', 'dbt_dguthrie'), 16 | 'warehouse': os.getenv('SF_WAREHOUSE', 'TRANSFORMING'), 17 | } 18 | 19 | target_name = 'prod' 20 | 21 | profile_config = { 22 | 'tpch': { 23 | 'outputs': { 24 | target_name: warehouse_config, 25 | }, 26 | 'target': target_name, 27 | }, 28 | } 29 | 30 | print(yaml.dump(profile_config)) 31 | -------------------------------------------------------------------------------- /scripts/multiple_ci.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import os 3 | import sys 4 | import time 5 | 6 | # third party 7 | import requests 8 | from dbtc import dbtCloudClient 9 | 10 | 11 | # Inputs 12 | CI_JOB_NAMING_CONVENTION = 'CI Job' 13 | CAUSE = 'Multiple CI Trigger' 14 | POLLING_INTERVAL = 10 15 | 16 | # Used in payload 17 | GITHUB_PULL_REQUEST_ID = os.getenv('GITHUB_PULL_REQUEST_ID', None) 18 | GIT_SHA = os.getenv('GIT_SHA', None) 19 | ACCOUNT_ID = os.getenv('DBT_CLOUD_ACCOUNT_ID', None) 20 | 21 | # Needed in list_runs endpoint 22 | COMPLETED_STATUSES = ['success', 'error', 'cancelled'] 23 | 24 | # If an error is encountered 25 | GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') 26 | PR_COMMENT_URL = os.getenv('PR_COMMENT_URL') 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | # Initialize client 32 | client = dbtCloudClient() 33 | 34 | # Get all the CI jobs across every project 35 | jobs = client.cloud.list_jobs(ACCOUNT_ID) 36 | total_job_count = jobs['extra']['pagination']['total_count'] 37 | all_jobs = jobs['data'] 38 | 39 | for offset in range(100, total_job_count, 100): 40 | jobs = client.cloud.list_jobs(ACCOUNT_ID, offset=offset) 41 | all_jobs.extend(jobs['data']) 42 | 43 | ci_jobs = [ 44 | job for job in all_jobs 45 | if CI_JOB_NAMING_CONVENTION.lower() in job['name'].lower() 46 | ] 47 | 48 | # Create standard payload 49 | payload = { 50 | 'cause': CAUSE, 51 | 'git_sha': GIT_SHA, 52 | 'github_pull_request_id': GITHUB_PULL_REQUEST_ID, 53 | } 54 | 55 | # Create list for all run IDs 56 | run_ids = [] 57 | 58 | # Trigger all your CI jobs 59 | for ci_job in ci_jobs: 60 | schema_override = f'dbt_cloud_pr_{ci_job["id"]}_{GITHUB_PULL_REQUEST_ID}' 61 | payload.update({'schema_override': schema_override}) 62 | run = client.cloud.trigger_job( 63 | ACCOUNT_ID, ci_job['id'], payload, should_poll=False 64 | ) 65 | run_ids.append(run['data']['id']) 66 | 67 | completed_dict = {s: [] for s in COMPLETED_STATUSES} 68 | 69 | # Poll until all runs are in a "completed" state (success, failure, cancelled) 70 | while True: 71 | time.sleep(POLLING_INTERVAL) 72 | completed_runs = client.cloud.list_runs( 73 | ACCOUNT_ID, 74 | order_by='-id', 75 | status=COMPLETED_STATUSES, 76 | include_related=['job'], 77 | )['data'] 78 | completed_runs_dict = {r['id']: r for r in completed_runs} 79 | completed_runs_ids = completed_runs_dict.keys() 80 | for run_id in run_ids[:]: 81 | if run_id in completed_runs_ids: 82 | run = completed_runs_dict[run_id] 83 | completed_dict[run['status_humanized'].lower()].append(run) 84 | run_ids.remove(run['id']) 85 | if len(run_ids) == 0: 86 | break 87 | 88 | if completed_dict['error'] or completed_dict['cancelled']: 89 | message = '# dbt Cloud Run Failures\n' 90 | for status in ['error', 'cancelled']: 91 | message += f'### The following jobs completed with a status of {status}:\n' 92 | for run in completed_dict[status]: 93 | job_name = run['job']['name'] 94 | href = run['href'] 95 | id = run['id'] 96 | message += f'- **{job_name}** failed for [Run #{id}]({href})\n' 97 | payload = {'body': message} 98 | headers = {'Authorization': f'Bearer {GITHUB_TOKEN}'} 99 | response = requests.post(PR_COMMENT_URL, json=payload, headers=headers) 100 | sys.exit(1) 101 | 102 | else: 103 | sys.exit(0) 104 | -------------------------------------------------------------------------------- /scripts/run_and_erd.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import os 3 | 4 | # third party 5 | from dbtc import dbtCloudClient as dbtc 6 | from eralchemy import render_er 7 | from snowflake.sqlalchemy import URL 8 | 9 | 10 | SCHEMAS = ['TPCH'] 11 | 12 | 13 | if __name__ == '__main__': 14 | 15 | account_id = os.getenv('DBT_CLOUD_ACCOUNT_ID') 16 | job_id = os.getenv('DBT_CLOUD_JOB_ID') 17 | 18 | # Initialize dbtCloudClient with appropriate tokens 19 | client = dbtc( 20 | service_token=os.getenv('DBT_CLOUD_SERVICE_TOKEN'), 21 | api_key=os.getenv('DBT_CLOUD_API_KEY') 22 | ) 23 | 24 | # Trigger Job and Poll until successful 25 | run_id = client.cloud.trigger_job_and_poll( 26 | account_id, job_id, {'cause': 'Triggered via GH actions'} 27 | ) 28 | 29 | print(f'View run here: https://cloud.getdbt.com/#/accounts/{account_id}/projects/88168/runs/{run_id}/') 30 | 31 | for schema in SCHEMAS: 32 | url = URL( 33 | account=os.getenv('SF_ACCOUNT'), 34 | user=os.getenv('SF_USER'), 35 | password=os.getenv('SF_PASSWORD'), 36 | database='DOUG_DEMO_V2', 37 | schema=schema, 38 | warehouse='TRANSFORMING', 39 | role='TRANSFORMER', 40 | ) 41 | render_er(str(url), f'assets/{schema.lower()}_erd.png') 42 | 43 | print(f'View docs here: https://cloud.getdbt.com/#/accounts/{account_id}/jobs/{job_id}/#!/overview') 44 | -------------------------------------------------------------------------------- /scripts/run_and_log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Before using this script, the following secrets need to be configured within your repo: 3 | - DBT_CLOUD_SERVICE_TOKEN 4 | - DBT_CLOUD_API_KEY 5 | - DBT_CLOUD_ACCOUNT_ID 6 | - DD_API_KEY --> API key from datadog, used indirectly as an env variable in datadog_api_client.v2.Configuration 7 | 8 | The following are configured within the action itself but would need to be added as an environment variable if running as a one-off. 9 | - DBT_CLOUD_JOB_ID 10 | - DD_SITE --> most likely will be datadoghq.com, used indirectly as an env variable in datadog_api_client.v2.Configuration 11 | """ 12 | 13 | # stdlib 14 | import json 15 | import os 16 | from typing import List 17 | 18 | # third party 19 | from datadog_api_client.v2 import ApiClient, Configuration 20 | from datadog_api_client.v2.api.logs_api import LogsApi 21 | from datadog_api_client.v2.model.http_log import HTTPLog 22 | from datadog_api_client.v2.model.http_log_item import HTTPLogItem 23 | from dbtc import dbtCloudClient as dbtc 24 | 25 | 26 | # Maximum array size from datadog docs 27 | MAX_LIST_SIZE = 1000 28 | 29 | # List of resources to pull metadata for 30 | # Exhaustive list is models, tests, sources, snapshots, macros, exposures, metrics, seeds 31 | RESOURCES = ['models', 'tests', 'sources', 'snapshots'] 32 | 33 | 34 | def chunker(seq): 35 | """Ensure that the log array is <= to the MAX_LIST_SIZE)""" 36 | size = MAX_LIST_SIZE 37 | return (seq[pos:pos + size] for pos in range(0, len(seq), size)) 38 | 39 | 40 | def send_logs(body: List[HTTPLogItem]): 41 | body = HTTPLog(body) 42 | configuration = Configuration() 43 | with ApiClient(configuration) as api_client: 44 | api_instance = LogsApi(api_client) 45 | response = api_instance.submit_log(body=body, content_encoding='gzip') 46 | return response 47 | 48 | 49 | if __name__ == '__main__': 50 | 51 | logs = [] 52 | account_id = os.getenv('DBT_CLOUD_ACCOUNT_ID') 53 | job_id = os.getenv('DBT_CLOUD_JOB_ID') 54 | 55 | # Initialize client with an API key and service token 56 | client = dbtc( 57 | service_token=os.getenv('DBT_CLOUD_SERVICE_TOKEN'), 58 | ) 59 | 60 | # Trigger Job and Poll until successful 61 | run = client.cloud.trigger_job( 62 | account_id, job_id, {'cause': 'Triggered via GH actions'} 63 | ) 64 | 65 | run_id = run['data']['id'] 66 | 67 | # Retrieve all resources defined above via metadata API 68 | for resource in RESOURCES: 69 | method = f'get_{resource}' 70 | data = getattr(client.metadata, method)( 71 | job_id=job_id, run_id=run_id 72 | )['data'][resource] 73 | for datum in data: 74 | logs.append(HTTPLogItem( 75 | ddsource='python', 76 | ddtags=f'job:daily_job,resource:{resource}', 77 | hostname='cloud.getdbt.com', 78 | message=json.dumps(datum), 79 | service='gh_actions' 80 | )) 81 | 82 | for log_items in chunker(logs): 83 | send_logs(log_items) 84 | -------------------------------------------------------------------------------- /seeds/country_codes.csv: -------------------------------------------------------------------------------- 1 | Name,Code 2 | Afghanistan,AF 3 | Ã…land Islands,AX 4 | Albania,AL 5 | Algeria,DZ 6 | American Samoa,AS 7 | Andorra,AD 8 | Angola,AO 9 | Anguilla,AI 10 | Antarctica,AQ 11 | Antigua and Barbuda,AG 12 | Argentina,AR 13 | Armenia,AM 14 | Aruba,AW 15 | Australia,AU 16 | Austria,AT 17 | Azerbaijan,AZ 18 | Bahamas,BS 19 | Bahrain,BH 20 | Bangladesh,BD 21 | Barbados,BB 22 | Belarus,BY 23 | Belgium,BE 24 | Belize,BZ 25 | Benin,BJ 26 | Bermuda,BM 27 | Bhutan,BT 28 | "Bolivia, Plurinational State of",BO 29 | "Bonaire, Sint Eustatius and Saba",BQ 30 | Bosnia and Herzegovina,BA 31 | Botswana,BW 32 | Bouvet Island,BV 33 | Brazil,BR 34 | British Indian Ocean Territory,IO 35 | Brunei Darussalam,BN 36 | Bulgaria,BG 37 | Burkina Faso,BF 38 | Burundi,BI 39 | Cambodia,KH 40 | Cameroon,CM 41 | Canada,CA 42 | Cape Verde,CV 43 | Cayman Islands,KY 44 | Central African Republic,CF 45 | Chad,TD 46 | Chile,CL 47 | China,CN 48 | Christmas Island,CX 49 | Cocos (Keeling) Islands,CC 50 | Colombia,CO 51 | Comoros,KM 52 | Congo,CG 53 | "Congo, the Democratic Republic of the",CD 54 | Cook Islands,CK 55 | Costa Rica,CR 56 | Côte d'Ivoire,CI 57 | Croatia,HR 58 | Cuba,CU 59 | Curaçao,CW 60 | Cyprus,CY 61 | Czech Republic,CZ 62 | Denmark,DK 63 | Djibouti,DJ 64 | Dominica,DM 65 | Dominican Republic,DO 66 | Ecuador,EC 67 | Egypt,EG 68 | El Salvador,SV 69 | Equatorial Guinea,GQ 70 | Eritrea,ER 71 | Estonia,EE 72 | Ethiopia,ET 73 | Falkland Islands (Malvinas),FK 74 | Faroe Islands,FO 75 | Fiji,FJ 76 | Finland,FI 77 | France,FR 78 | French Guiana,GF 79 | French Polynesia,PF 80 | French Southern Territories,TF 81 | Gabon,GA 82 | Gambia,GM 83 | Georgia,GE 84 | Germany,DE 85 | Ghana,GH 86 | Gibraltar,GI 87 | Greece,GR 88 | Greenland,GL 89 | Grenada,GD 90 | Guadeloupe,GP 91 | Guam,GU 92 | Guatemala,GT 93 | Guernsey,GG 94 | Guinea,GN 95 | Guinea-Bissau,GW 96 | Guyana,GY 97 | Haiti,HT 98 | Heard Island and McDonald Islands,HM 99 | Holy See (Vatican City State),VA 100 | Honduras,HN 101 | Hong Kong,HK 102 | Hungary,HU 103 | Iceland,IS 104 | India,IN 105 | Indonesia,ID 106 | "Iran, Islamic Republic of",IR 107 | Iraq,IQ 108 | Ireland,IE 109 | Isle of Man,IM 110 | Israel,IL 111 | Italy,IT 112 | Jamaica,JM 113 | Japan,JP 114 | Jersey,JE 115 | Jordan,JO 116 | Kazakhstan,KZ 117 | Kenya,KE 118 | Kiribati,KI 119 | "Korea, Democratic People's Republic of",KP 120 | "Korea, Republic of",KR 121 | Kuwait,KW 122 | Kyrgyzstan,KG 123 | Lao People's Democratic Republic,LA 124 | Latvia,LV 125 | Lebanon,LB 126 | Lesotho,LS 127 | Liberia,LR 128 | Libya,LY 129 | Liechtenstein,LI 130 | Lithuania,LT 131 | Luxembourg,LU 132 | Macao,MO 133 | "Macedonia, the Former Yugoslav Republic of",MK 134 | Madagascar,MG 135 | Malawi,MW 136 | Malaysia,MY 137 | Maldives,MV 138 | Mali,ML 139 | Malta,MT 140 | Marshall Islands,MH 141 | Martinique,MQ 142 | Mauritania,MR 143 | Mauritius,MU 144 | Mayotte,YT 145 | Mexico,MX 146 | "Micronesia, Federated States of",FM 147 | "Moldova, Republic of",MD 148 | Monaco,MC 149 | Mongolia,MN 150 | Montenegro,ME 151 | Montserrat,MS 152 | Morocco,MA 153 | Mozambique,MZ 154 | Myanmar,MM 155 | Namibia,NA 156 | Nauru,NR 157 | Nepal,NP 158 | Netherlands,NL 159 | New Caledonia,NC 160 | New Zealand,NZ 161 | Nicaragua,NI 162 | Niger,NE 163 | Nigeria,NG 164 | Niue,NU 165 | Norfolk Island,NF 166 | Northern Mariana Islands,MP 167 | Norway,NO 168 | Oman,OM 169 | Pakistan,PK 170 | Palau,PW 171 | "Palestine, State of",PS 172 | Panama,PA 173 | Papua New Guinea,PG 174 | Paraguay,PY 175 | Peru,PE 176 | Philippines,PH 177 | Pitcairn,PN 178 | Poland,PL 179 | Portugal,PT 180 | Puerto Rico,PR 181 | Qatar,QA 182 | Réunion,RE 183 | Romania,RO 184 | Russian Federation,RU 185 | Rwanda,RW 186 | Saint Barthélemy,BL 187 | "Saint Helena, Ascension and Tristan da Cunha",SH 188 | Saint Kitts and Nevis,KN 189 | Saint Lucia,LC 190 | Saint Martin (French part),MF 191 | Saint Pierre and Miquelon,PM 192 | Saint Vincent and the Grenadines,VC 193 | Samoa,WS 194 | San Marino,SM 195 | Sao Tome and Principe,ST 196 | Saudi Arabia,SA 197 | Senegal,SN 198 | Serbia,RS 199 | Seychelles,SC 200 | Sierra Leone,SL 201 | Singapore,SG 202 | Sint Maarten (Dutch part),SX 203 | Slovakia,SK 204 | Slovenia,SI 205 | Solomon Islands,SB 206 | Somalia,SO 207 | South Africa,ZA 208 | South Georgia and the South Sandwich Islands,GS 209 | South Sudan,SS 210 | Spain,ES 211 | Sri Lanka,LK 212 | Sudan,SD 213 | Suriname,SR 214 | Svalbard and Jan Mayen,SJ 215 | Swaziland,SZ 216 | Sweden,SE 217 | Switzerland,CH 218 | Syrian Arab Republic,SY 219 | "Taiwan, Province of China",TW 220 | Tajikistan,TJ 221 | "Tanzania, United Republic of",TZ 222 | Thailand,TH 223 | Timor-Leste,TL 224 | Togo,TG 225 | Tokelau,TK 226 | Tonga,TO 227 | Trinidad and Tobago,TT 228 | Tunisia,TN 229 | Turkey,TR 230 | Turkmenistan,TM 231 | Turks and Caicos Islands,TC 232 | Tuvalu,TV 233 | Uganda,UG 234 | Ukraine,UA 235 | United Arab Emirates,AE 236 | United Kingdom,GB 237 | United States,US 238 | United States Minor Outlying Islands,UM 239 | Uruguay,UY 240 | Uzbekistan,UZ 241 | Vanuatu,VU 242 | "Venezuela, Bolivarian Republic of",VE 243 | Viet Nam,VN 244 | "Virgin Islands, British",VG 245 | "Virgin Islands, U.S.",VI 246 | Wallis and Futuna,WF 247 | Western Sahara,EH 248 | Yemen,YE 249 | Zambia,ZM 250 | Zimbabwe,ZW -------------------------------------------------------------------------------- /seeds/data.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | seeds: 4 | - name: snowflake_contract_rates 5 | description: > 6 | This is an example of a seed. 7 | Seeds are CSV files in your dbt project (typically in your data directory), that dbt can load into your data warehouse using the dbt seed command. 8 | columns: 9 | - name: effective_date 10 | description: effective date of contract 11 | - name: rate 12 | description: rate based on the effective date 13 | 14 | - name: country_codes 15 | description: Lookup table for country and two digit ISO codes 16 | columns: 17 | - name: Name 18 | description: Name of the country 19 | - name: Code 20 | description: 2-digit ISO code for the country 21 | -------------------------------------------------------------------------------- /seeds/snowflake_contract_rates.csv: -------------------------------------------------------------------------------- 1 | effective_date,rate 2 | 2018-06-01,2.55 3 | 2019-08-01,2.48 4 | -------------------------------------------------------------------------------- /snapshots/tpch/tpch_customer_snapshot.sql: -------------------------------------------------------------------------------- 1 | {% snapshot tpch_customer_snapshot %} 2 | 3 | {{ config( 4 | target_database='doug_demo_v2', 5 | target_schema='snapshots', 6 | unique_key='c_custkey', 7 | strategy='timestamp', 8 | updated_at='_etl_updated_timestamp', 9 | )}} 10 | 11 | select * from {{ source('tpch', 'customer') }} 12 | 13 | {% endsnapshot %} -------------------------------------------------------------------------------- /snapshots/tpch/tpch_part_snapshot.sql: -------------------------------------------------------------------------------- 1 | {% snapshot tpch_part_snapshot %} 2 | 3 | {{ config( 4 | target_database='doug_demo_v2', 5 | target_schema='snapshots', 6 | unique_key='p_partkey', 7 | strategy='timestamp', 8 | updated_at='_etl_updated_timestamp', 9 | )}} 10 | 11 | select * from {{ source('tpch', 'part') }} 12 | 13 | {% endsnapshot %} -------------------------------------------------------------------------------- /snapshots/tpch/tpch_supplier_snapshot.sql: -------------------------------------------------------------------------------- 1 | {% snapshot tpch_supplier_snapshot %} 2 | 3 | {{ config( 4 | target_database='doug_demo_v2', 5 | target_schema='snapshots', 6 | unique_key='s_suppkey', 7 | strategy='timestamp', 8 | updated_at='_etl_updated_timestamp', 9 | )}} 10 | 11 | select * from {{ source('tpch', 'supplier') }} 12 | 13 | {% endsnapshot %} -------------------------------------------------------------------------------- /tests/macro_stg_tpch_orders_assert_pos_price.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | enabled=true, 4 | severity='error', 5 | tags = ['finance'] 6 | ) 7 | }} 8 | 9 | 10 | {{ test_all_values_gte_zero('stg_tpch_orders', 'total_price') }} -------------------------------------------------------------------------------- /tests/macro_stg_tphc_suppliers_assert_pos_acct_bal.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | enabled=true, 4 | severity='warn', 5 | tags = ['finance'] 6 | ) 7 | }} 8 | 9 | 10 | {{ test_all_values_gte_zero('stg_tpch_suppliers', 'account_balance') }} -------------------------------------------------------------------------------- /tests/stg_tpch_orders_assert_positive_price.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | enabled=true, 4 | severity='error', 5 | tags = ['finance'] 6 | ) 7 | }} 8 | 9 | with orders as ( select * from {{ ref('stg_tpch_orders') }} ) 10 | 11 | select * 12 | from orders 13 | where total_price < 0 14 | --------------------------------------------------------------------------------