├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── publish-docs-main.yml │ ├── publish-docs-release.yml │ ├── publish.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── dbt_loom ├── __init__.py ├── clients │ ├── az_blob.py │ ├── dbt_cloud.py │ ├── gcs.py │ ├── s3.py │ └── snowflake_stage.py ├── config.py ├── logging.py ├── manifests.py └── shims.py ├── docs ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── advanced-configuration.md ├── getting-started.md └── index.md ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml ├── test_projects ├── customer_success │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── .sqlfluff │ ├── .sqlfluffignore │ ├── README.md │ ├── Taskfile.yml │ ├── analyses │ │ └── .gitkeep │ ├── dbt_loom.config.yml │ ├── dbt_project.yml │ ├── jaffle-data │ │ └── raw_customers.csv │ ├── macros │ │ ├── .gitkeep │ │ └── cents_to_dollars.sql │ ├── meltano.yml │ ├── models │ │ ├── marts │ │ │ ├── __models.yml │ │ │ ├── customer_status_histories.py │ │ │ └── customers.sql │ │ └── staging │ │ │ ├── __models.yml │ │ │ ├── __sources.yml │ │ │ └── stg_customers.sql │ ├── package-lock.yml │ ├── packages.yml │ ├── profiles.yml │ ├── reports │ │ ├── .evidence │ │ │ └── customization │ │ │ │ └── custom-formatting.json │ │ ├── .gitignore │ │ ├── README.md │ │ ├── package-lock.json │ │ ├── package.json │ │ └── pages │ │ │ ├── analysis │ │ │ └── seasonality-investigation.md │ │ │ ├── customers │ │ │ ├── [customer].md │ │ │ └── index.md │ │ │ ├── index.md │ │ │ └── stores │ │ │ ├── [city].md │ │ │ └── index.md │ ├── requirements.txt │ ├── snapshots │ │ └── .gitkeep │ └── tests │ │ └── .gitkeep └── revenue │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── .sqlfluff │ ├── .sqlfluffignore │ ├── README.md │ ├── Taskfile.yml │ ├── analyses │ └── .gitkeep │ ├── dbt_loom.config.yml │ ├── dbt_project.yml │ ├── jaffle-data │ ├── raw_items.csv │ ├── raw_orders.csv │ ├── raw_products.csv │ ├── raw_stores.csv │ └── raw_supplies.csv │ ├── macros │ ├── .gitkeep │ └── cents_to_dollars.sql │ ├── meltano.yml │ ├── models │ ├── groups.yml │ ├── marts │ │ ├── __models.yml │ │ ├── accounts.sql │ │ ├── orders_v1.sql │ │ └── orders_v2.sql │ └── staging │ │ ├── __models.yml │ │ ├── __sources.yml │ │ ├── stg_accounts.sql │ │ ├── stg_locations.sql │ │ ├── stg_order_items.sql │ │ ├── stg_orders.sql │ │ ├── stg_products.sql │ │ └── stg_supplies.sql │ ├── package-lock.yml │ ├── packages.yml │ ├── profiles.yml │ ├── reports │ ├── .evidence │ │ └── customization │ │ │ └── custom-formatting.json │ ├── .gitignore │ ├── README.md │ ├── package-lock.json │ ├── package.json │ └── pages │ │ ├── analysis │ │ └── seasonality-investigation.md │ │ ├── customers │ │ ├── [customer].md │ │ └── index.md │ │ ├── index.md │ │ └── stores │ │ ├── [city].md │ │ └── index.md │ ├── requirements.txt │ ├── seeds │ ├── __seeds.yml │ ├── integers.csv │ └── seed_accounts.csv │ ├── snapshots │ └── .gitkeep │ └── tests │ └── .gitkeep └── tests ├── __init__.py ├── test_dbt_core_execution.py ├── test_mainfest_node.py └── test_manifest_loaders.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: nicholasyager # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug, triage 6 | assignees: nicholasyager 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. dbt state 16 | 2. Run the <> command 17 | 3. See error 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | - OS: [e.g. MacOS 14.2.1] 26 | - dbt-loom Version [e.g. 0.4.0] 27 | - dbt-core Version [e.g. 1.7.10] 28 | 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement, triage 6 | assignees: nicholasyager 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs-main.yml: -------------------------------------------------------------------------------- 1 | name: Publish MkDocs on Main Branch 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: 3.9 21 | 22 | - name: Install dependencies 23 | run: | 24 | python3 -m pip install --upgrade pip 25 | pip install poetry 26 | poetry install --with=docs 27 | 28 | - name: Deploy to GitHub Pages 29 | env: 30 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 31 | run: | 32 | git config --global user.name "GitHub Actions Bot" 33 | git config --global user.email "github-actions[bot]@users.noreply.github.com" 34 | poetry run mike deploy --push --message "Deployed by GitHub Actions" main 35 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs-release.yml: -------------------------------------------------------------------------------- 1 | name: Publish MkDocs on Release 2 | 3 | on: 4 | release: 5 | types: 6 | - published 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: 3.9 21 | 22 | - name: Install dependencies 23 | run: | 24 | python3 -m pip install --upgrade pip 25 | pip install poetry 26 | poetry install --with=docs 27 | 28 | - name: Extract Major and Minor Version 29 | run: | 30 | VERSION_TAG=${{ github.event.release.tag_name }} 31 | VERSION_TAG="${VERSION_TAG#v}" # Remove 'v' prefix if present 32 | MAJOR="${VERSION_TAG%%.*}" 33 | MINOR="${VERSION_TAG#*.}" 34 | MINOR="${MINOR%%.*}" 35 | MAJOR_MINOR_VERSION="${MAJOR}.${MINOR}" 36 | echo "MAJOR_MINOR_VERSION=${MAJOR_MINOR_VERSION}" >> $GITHUB_ENV 37 | 38 | - name: Deploy Updated Docs 39 | env: 40 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 41 | run: | 42 | git config user.name "GitHub Actions" 43 | git config user.email "actions@github.com" 44 | poetry run mike deploy --push --message "Deploy docs for release ${{ github.event.release.tag_name }}" --update-alias $MAJOR_MINOR_VERSION latest 45 | poetry run mike set-default --push latest 46 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: PyPi Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | release: 9 | environment: release 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | python-version: [3.11] 14 | poetry-version: [1.4.2] 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | 26 | - name: Install poetry ${{ matrix.poetry-version }} 27 | run: | 28 | python -m ensurepip 29 | python -m pip install --upgrade pip 30 | python -m pip install poetry==${{ matrix.poetry-version }} 31 | 32 | - name: Install dependencies 33 | shell: bash 34 | run: python -m poetry install 35 | 36 | - name: Build 37 | run: | 38 | python -m poetry build 39 | 40 | - name: Publish 41 | env: 42 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }} 43 | run: | 44 | python -m poetry publish --skip-existing -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Run tests on pull requests 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | test: 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | python-version: [3.11] 14 | poetry-version: [1.4.2] 15 | dbt-version: [1.6.0, 1.7.0, 1.8.0, 1.9.0b2] 16 | 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: Install poetry ${{ matrix.poetry-version }} 28 | run: | 29 | python -m ensurepip 30 | python -m pip install --upgrade pip 31 | python -m pip install poetry==${{ matrix.poetry-version }} 32 | 33 | - name: Install dependencies 34 | shell: bash 35 | run: python -m poetry install --with=dev 36 | 37 | - name: Install dbt-core 38 | shell: bash 39 | run: python -m poetry add dbt-core~=${{ matrix.dbt-version }} --allow-prereleases 40 | 41 | - name: Test 42 | run: | 43 | python -m poetry run pytest 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | venv/ 3 | venv-*/ 4 | dist/ 5 | 6 | __pycache__/ 7 | .mypy_cache/ 8 | .pytest_cache/ 9 | 10 | .idea/ 11 | .vscode/ 12 | 13 | */target/ 14 | */dbt_packages/ 15 | */logs/ 16 | logs/ 17 | *.duckdb 18 | *.duckdb.wal 19 | *.user.yml 20 | *.db 21 | 22 | reports/sources/*.csv 23 | 24 | .meltano 25 | .DS_Store 26 | .ruff_cache -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_stages: [push] 2 | 3 | default_language_version: 4 | python: python3.11 5 | 6 | repos: 7 | - repo: https://github.com/astral-sh/ruff-pre-commit 8 | # Ruff version. 9 | rev: v0.0.278 10 | hooks: 11 | - id: ruff 12 | args: [--fix, --exit-non-zero-on-fix] 13 | 14 | - repo: local 15 | hooks: 16 | - id: ruff 17 | stages: [commit, push] 18 | name: ruff_format 19 | entry: poetry run ruff format 20 | language: system 21 | types: [python] 22 | - id: mypy 23 | stages: [commit, push] 24 | name: mypy 25 | entry: poetry run mypy --ignore-missing-imports 26 | language: system 27 | types: [python] 28 | 29 | - repo: https://github.com/pre-commit/pre-commit-hooks 30 | rev: v2.1.0 31 | hooks: 32 | - id: trailing-whitespace 33 | stages: [commit, push] 34 | - id: check-added-large-files 35 | - id: check-ast 36 | stages: [commit, push] 37 | - id: check-case-conflict 38 | - id: check-byte-order-marker 39 | - id: check-executables-have-shebangs 40 | - id: check-docstring-first 41 | stages: [commit, push] 42 | - id: check-json 43 | - id: check-merge-conflict 44 | stages: [commit, push] 45 | - id: check-symlinks 46 | - id: check-vcs-permalinks 47 | - id: check-xml 48 | - id: check-yaml 49 | - id: debug-statements 50 | - id: detect-private-key 51 | # - id: flake8 52 | # stages: [commit,push] 53 | - id: forbid-new-submodules 54 | - id: no-commit-to-branch 55 | stages: [commit, push] 56 | args: 57 | - --branch=main 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dbt-loom 2 | 3 | [![pypi version shield](https://img.shields.io/pypi/v/dbt-loom)](https://img.shields.io/pypi/v/dbt-loom) 4 | 5 | dbt-loom is a dbt Core plugin that weaves together multi-project deployments. dbt-loom works by fetching public model definitions from your dbt artifacts, and injecting those models into your dbt project. 6 | 7 | ```mermaid 8 | flowchart LR 9 | 10 | classDef black fill:#f2f2ebff, stroke:#000, color:#000 11 | classDef background fill:#f2f2ebff, stroke:#000, color:#000 12 | classDef hidden fill:#BADC3F, stroke:#BADC3F, color:#BADC3F 13 | 14 | style TOP fill:#BADC3F, stroke:#000 15 | 16 | subgraph TOP[Your Infrastructure] 17 | direction TB 18 | dbt_runtime[dbt Core]:::background 19 | proprietary_plugin[Open Source Metadata Plugin]:::background 20 | 21 | files[Local and Remote Files]:::background 22 | object_storage[Object Storage]:::background 23 | discovery_api[dbt Cloud APIs]:::background 24 | 25 | discovery_api --> proprietary_plugin 26 | files --> proprietary_plugin 27 | object_storage --> proprietary_plugin 28 | proprietary_plugin --> dbt_runtime 29 | end 30 | 31 | Project:::black --> TOP --> Warehouse:::black 32 | ``` 33 | 34 | dbt-loom currently supports obtaining model definitions from: 35 | 36 | - Local manifest files 37 | - Remote manifest files via http(s) 38 | - dbt Cloud 39 | - GCS 40 | - S3-compatible object storage services 41 | - Azure Storage 42 | 43 | ## Getting Started 44 | 45 | To begin, install the `dbt-loom` python package. 46 | 47 | ```console 48 | pip install dbt-loom 49 | ``` 50 | 51 | Next, create a `dbt-loom` configuration file. This configuration file provides the paths for your 52 | upstream project's manifest files. 53 | 54 | ```yaml 55 | manifests: 56 | - name: project_name # This should match the project's real name 57 | type: file 58 | config: 59 | # A path to your manifest. This can be either a local path, or a remote 60 | # path accessible via http(s). 61 | path: path/to/manifest.json 62 | ``` 63 | 64 | By default, `dbt-loom` will look for `dbt_loom.config.yml` in your working directory. You can also set the 65 | `DBT_LOOM_CONFIG` environment variable. 66 | 67 | ## How does it work? 68 | 69 | As of dbt-core 1.6.0-b8, there now exists a `dbtPlugin` class which defines functions that can 70 | be called by dbt-core's `PluginManager`. During different parts of the dbt-core lifecycle (such as graph linking and 71 | manifest writing), the `PluginManager` will be called and all plugins registered with the appropriate hook will be executed. 72 | 73 | dbt-loom implements a `get_nodes` hook, and uses a configuration file to parse manifests, identify public models, and 74 | inject those public models when called by `dbt-core`. 75 | 76 | ## Advanced Features 77 | 78 | ### Loading artifacts from remote sources 79 | 80 | `dbt-loom` supports automatically fetching manifest artifacts from a variety 81 | of remote sources. 82 | 83 | #### Using dbt Cloud as an artifact source 84 | 85 | You can use dbt-loom to fetch model definitions from dbt Cloud by setting up a `dbt-cloud` manifest in your `dbt-loom` config, and setting the `DBT_CLOUD_API_TOKEN` environment variable in your execution environment. 86 | 87 | ```yaml 88 | manifests: 89 | - name: project_name 90 | type: dbt_cloud 91 | config: 92 | account_id: 93 | 94 | # Job ID pertains to the job that you'd like to fetch artifacts from. 95 | job_id: 96 | 97 | api_endpoint: 98 | # dbt Cloud has multiple regions with different URLs. Update this to 99 | # your appropriate dbt cloud endpoint. 100 | 101 | step_id: 102 | # If your job generates multiple artifacts, you can set the step from 103 | # which to fetch artifacts. Defaults to the last step. 104 | ``` 105 | 106 | #### Using an S3-compatible object store as an artifact source 107 | 108 | You can use dbt-loom to fetch manifest files from S3-compatible object stores 109 | by setting up ab `s3` manifest in your `dbt-loom` config. Please note that this 110 | approach supports all standard boto3-compatible environment variables and authentication mechanisms. Please see the [boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables) for more details. 111 | 112 | ```yaml 113 | manifests: 114 | - name: project_name 115 | type: s3 116 | config: 117 | bucket_name: 118 | # The name of the bucket where your manifest is stored. 119 | 120 | object_name: 121 | # The object name of your manifest file. 122 | ``` 123 | 124 | #### Using GCS as an artifact source 125 | 126 | You can use dbt-loom to fetch manifest files from Google Cloud Storage by setting up a `gcs` manifest in your `dbt-loom` config. 127 | 128 | ```yaml 129 | manifests: 130 | - name: project_name 131 | type: gcs 132 | config: 133 | project_id: 134 | # The alphanumeric ID of the GCP project that contains your target bucket. 135 | 136 | bucket_name: 137 | # The name of the bucket where your manifest is stored. 138 | 139 | object_name: 140 | # The object name of your manifest file. 141 | 142 | credentials: 143 | # The OAuth2 Credentials to use. If not passed, falls back to the default inferred from the environment. 144 | ``` 145 | 146 | #### Using Azure Storage as an artifact source 147 | 148 | You can use dbt-loom to fetch manifest files from Azure Storage 149 | by setting up an `azure` manifest in your `dbt-loom` config. The `azure` type implements 150 | the [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python) 151 | class, supporting all environment variables and authentication mechanisms. 152 | Alternatively, set the `AZURE_STORAGE_CONNECTION_STRING` environment variable to 153 | authenticate via a connection string. 154 | 155 | ```yaml 156 | manifests: 157 | - name: project_name 158 | type: azure 159 | config: 160 | account_name: # The name of your Azure Storage account 161 | container_name: # The name of your Azure Storage container 162 | object_name: # The object name of your manifest file. 163 | ``` 164 | 165 | #### Using Snowflake Stage as an artifact source 166 | 167 | You can use dbt-loom to fetch manifest files from Snowflake Stage by setting up a `snowflake` manifest in your `dbt-loom` config. Please note that this only 168 | works for dbt-core versions 1.8.0 and newer. 169 | 170 | ```yaml 171 | manifests: 172 | - name: project_name 173 | type: snowflake 174 | config: 175 | stage: stage_name # Stage name, can include Database/Schema 176 | stage_path: path/to/dbt/manifest.json # Path to manifest file in the stage 177 | ``` 178 | 179 | ### Using environment variables 180 | 181 | You can easily incorporate your own environment variables into the config file. This allows for dynamic configuration values that can change based on the environment. To specify an environment variable in the `dbt-loom` config file, use one of the following formats: 182 | 183 | `${ENV_VAR}` or `$ENV_VAR` 184 | 185 | #### Example: 186 | 187 | ```yaml 188 | manifests: 189 | - name: revenue 190 | type: gcs 191 | config: 192 | project_id: ${GCP_PROJECT} 193 | bucket_name: ${GCP_BUCKET} 194 | object_name: ${MANIFEST_PATH} 195 | ``` 196 | 197 | ### Gzipped files 198 | 199 | `dbt-loom` natively supports decompressing gzipped manifest files. This is useful to reduce object storage size and to minimize loading times when reading manifests from object storage. Compressed file detection is triggered when the file path for the manifest is suffixed 200 | with `.gz`. 201 | 202 | ```yaml 203 | manifests: 204 | - name: revenue 205 | type: s3 206 | config: 207 | bucket_name: example_bucket_name 208 | object_name: manifest.json.gz 209 | ``` 210 | 211 | ### Exclude nested packages 212 | 213 | In some circumstances, like running `dbt-project-evaluator`, you may not want a 214 | given package in an upstream project to be imported into a downstream project. 215 | You can manually exclude downstream projects from injecting assets from packages 216 | by adding the package name to the downstream project's `excluded_packages` list. 217 | 218 | ```yaml 219 | manifests: 220 | - name: revenue 221 | type: file 222 | config: 223 | path: ../revenue/target/manifest.json 224 | excluded_packages: 225 | # Provide the string name of the package to exclude during injection. 226 | - dbt_project_evaluator 227 | ``` 228 | 229 | ### Optional manifests 230 | 231 | If you want to allow a manifest reference to be missing (e.g. using dbt-loom for an upstream project to see dependencies), you can set `optional: true` for that manifest entry. When `optional` is true and the manifest file does not exist, dbt-loom will skip loading it without raising an error. If `optional` is false or omitted (the default), missing manifests will cause an error. 232 | 233 | ```yaml 234 | manifests: 235 | - name: revenue 236 | type: file 237 | config: 238 | path: ../revenue/target/manifest.json 239 | optional: true # If the manifest file is missing, do not raise an error 240 | ``` 241 | 242 | ## Known Caveats 243 | 244 | Cross-project dependencies are a relatively new development, and dbt-core plugins 245 | are still in beta. As such there are a number of caveats to be aware of when using 246 | this tool. 247 | 248 | 1. dbt plugins are only supported in dbt-core version 1.6.0-b8 and newer. This means you must be using a dbt adapter 249 | compatible with this version. 250 | 2. `PluginNodeArgs` are not fully-realized dbt `ManifestNode`s, so documentation generated by `dbt docs generate` may 251 | be sparse when viewing injected models. 252 | -------------------------------------------------------------------------------- /dbt_loom/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import os 3 | import re 4 | from pathlib import Path 5 | from typing import Callable, Dict, Optional, Set 6 | 7 | import yaml 8 | from dbt.contracts.graph.node_args import ModelNodeArgs 9 | from dbt.contracts.graph.nodes import ModelNode 10 | 11 | from dbt.plugins.manager import dbt_hook, dbtPlugin 12 | from dbt.plugins.manifest import PluginNodes 13 | from dbt.config.project import VarProvider 14 | 15 | from dbt_loom.shims import is_invalid_private_ref, is_invalid_protected_ref 16 | 17 | try: 18 | from dbt.artifacts.resources.types import NodeType 19 | except ModuleNotFoundError: 20 | from dbt.node_types import NodeType # type: ignore 21 | 22 | 23 | from dbt_loom.config import dbtLoomConfig 24 | from dbt_loom.logging import fire_event 25 | from dbt_loom.manifests import ManifestLoader, ManifestNode 26 | 27 | import importlib.metadata 28 | 29 | 30 | @dataclass 31 | class LoomModelNodeArgs(ModelNodeArgs): 32 | """A dbt-loom extension of ModelNodeArgs to preserve resource types across lineages.""" 33 | 34 | resource_type: NodeType = NodeType.Model 35 | group: Optional[str] = None 36 | 37 | def __init__(self, **kwargs): 38 | super().__init__( 39 | **{ 40 | key: value 41 | for key, value in kwargs.items() 42 | if key not in ("resource_type", "group") 43 | } 44 | ) 45 | self.resource_type = kwargs.get("resource_type", NodeType.Model) 46 | self.group = kwargs.get("group") 47 | 48 | @property 49 | def unique_id(self) -> str: 50 | unique_id = f"{self.resource_type}.{self.package_name}.{self.name}" 51 | if self.version: 52 | unique_id = f"{unique_id}.v{self.version}" 53 | 54 | return unique_id 55 | 56 | 57 | def identify_node_subgraph(manifest) -> Dict[str, ManifestNode]: 58 | """ 59 | Identify all nodes that should be selected from the manifest, and return ManifestNodes. 60 | """ 61 | 62 | output = {} 63 | 64 | # We're going to temporarily allow all nodes here. 65 | for unique_id in manifest["nodes"].keys(): 66 | if unique_id.split(".")[0] in (NodeType.Test.value, NodeType.Macro.value): 67 | continue 68 | 69 | node = manifest.get("nodes", {}).get(unique_id) 70 | 71 | if not node: 72 | continue 73 | 74 | if node.get("access") is None: 75 | node["access"] = node.get("config", {}).get("access", "protected") 76 | 77 | # Versions may be floats or strings. Standardize on strings for compatibility. 78 | for key in ("version", "latest_version"): 79 | if node.get(key): 80 | node[key] = str(node[key]) 81 | 82 | output[unique_id] = ManifestNode(**(node)) 83 | 84 | return output 85 | 86 | 87 | def convert_model_nodes_to_model_node_args( 88 | selected_nodes: Dict[str, ManifestNode], 89 | ) -> Dict[str, LoomModelNodeArgs]: 90 | """Generate a dictionary of ModelNodeArgs based on a dictionary of ModelNodes""" 91 | return { 92 | unique_id: LoomModelNodeArgs( 93 | schema=node.schema_name, 94 | identifier=node.identifier, 95 | **(node.dump()), 96 | ) 97 | for unique_id, node in selected_nodes.items() 98 | if node is not None 99 | } 100 | 101 | 102 | @dataclass 103 | class LoomRunnableConfig: 104 | """A shim class to allow is_invalid_*_ref functions to correctly handle access for loom-injected models.""" 105 | 106 | restrict_access: bool = True 107 | vars: VarProvider = VarProvider(vars={}) 108 | 109 | 110 | class dbtLoom(dbtPlugin): 111 | """ 112 | dbtLoom is a dbt plugin that loads manifest files, parses a DAG from the manifest, 113 | and injects public nodes from imported manifest. 114 | """ 115 | 116 | def __init__(self, project_name: str): 117 | # Log the version of dbt-loom being initialized 118 | fire_event( 119 | msg=f'Initializing dbt-loom={importlib.metadata.version("dbt-loom")}' 120 | ) 121 | 122 | configuration_path = Path( 123 | os.environ.get("DBT_LOOM_CONFIG", "dbt_loom.config.yml") 124 | ) 125 | 126 | self._manifest_loader = ManifestLoader() 127 | self.manifests: Dict[str, Dict] = {} 128 | 129 | self.config: Optional[dbtLoomConfig] = self.read_config(configuration_path) 130 | self.models: Dict[str, LoomModelNodeArgs] = {} 131 | 132 | self._patch_ref_protection() 133 | 134 | if not self.config or (self.config and not self.config.enable_telemetry): 135 | self._patch_plugin_telemetry() 136 | 137 | super().__init__(project_name) 138 | 139 | def _patch_ref_protection(self) -> None: 140 | """Patch out the ref protection functions for proper protections""" 141 | import dbt.contracts.graph.manifest 142 | 143 | fire_event( 144 | msg="dbt-loom: Patching ref protection methods to support dbt-loom dependencies." 145 | ) 146 | 147 | dbt.contracts.graph.manifest.Manifest.is_invalid_protected_ref = ( # type: ignore 148 | self.dependency_wrapper(is_invalid_protected_ref) 149 | ) 150 | dbt.contracts.graph.manifest.Manifest.is_invalid_private_ref = ( # type: ignore 151 | self.dependency_wrapper(is_invalid_private_ref) 152 | ) 153 | 154 | dbt.parser.manifest.ManifestLoader.check_valid_group_config_node = ( # type: ignore 155 | self.group_validation_wrapper( 156 | dbt.parser.manifest.ManifestLoader.check_valid_group_config_node # type: ignore 157 | ) 158 | ) 159 | 160 | dbt.contracts.graph.nodes.ModelNode.from_args = ( # type: ignore 161 | self.model_node_wrapper(dbt.contracts.graph.nodes.ModelNode.from_args) # type: ignore 162 | ) 163 | 164 | def _patch_plugin_telemetry(self) -> None: 165 | """Patch the plugin telemetry function to prevent tracking of dbt plugins.""" 166 | import dbt.tracking 167 | 168 | dbt.tracking.track = self.tracking_wrapper(dbt.tracking.track) 169 | 170 | def tracking_wrapper(self, function) -> Callable: 171 | """Wrap the telemetry `track` function and return early if we're tracking plugin actions.""" 172 | 173 | def outer_function(*args, **kwargs): 174 | """Check the context of the snowplow tracker message for references to loom. Return if present.""" 175 | 176 | if any( 177 | [ 178 | self.__class__.__name__ in str(context_item.__dict__) 179 | or "dbt-loom" in str(context_item.__dict__) 180 | or "dbt_loom" in str(context_item.__dict__) 181 | for context_item in kwargs.get("context", []) 182 | ] 183 | ): 184 | return 185 | 186 | return function(*args, **kwargs) 187 | 188 | return outer_function 189 | 190 | def model_node_wrapper(self, function) -> Callable: 191 | """Wrap the ModelNode.from_args function and inject extra properties from the LoomModelNodeArgs.""" 192 | 193 | def outer_function(args: LoomModelNodeArgs) -> ModelNode: 194 | model = function(args) 195 | model.group = args.group 196 | return model 197 | 198 | return outer_function 199 | 200 | def group_validation_wrapper(self, function) -> Callable: 201 | """Wrap the check_valid_group_config_node function to inject upstream group names.""" 202 | 203 | def outer_function( 204 | inner_self, groupable_node, valid_group_names: Set[str] 205 | ) -> bool: 206 | new_groups: Set[str] = { 207 | model.group for model in self.models.values() if model.group is not None 208 | } 209 | 210 | return function( 211 | inner_self, groupable_node, valid_group_names.union(new_groups) 212 | ) 213 | 214 | return outer_function 215 | 216 | def dependency_wrapper(self, function) -> Callable: 217 | def outer_function(inner_self, node, target_model, dependencies) -> bool: 218 | if self.config is not None: 219 | for manifest_name in self.manifests.keys(): 220 | if manifest_name in dependencies: 221 | continue 222 | 223 | dependencies[manifest_name] = LoomRunnableConfig() 224 | 225 | return function(inner_self, node, target_model, dependencies) 226 | 227 | return outer_function 228 | 229 | def get_groups(self) -> Set[str]: 230 | """Get all groups defined in injected models.""" 231 | 232 | return { 233 | model.group for model in self.models.values() if model.group is not None 234 | } 235 | 236 | def read_config(self, path: Path) -> Optional[dbtLoomConfig]: 237 | """Read the dbt-loom configuration file.""" 238 | if not path.exists(): 239 | fire_event( 240 | msg=f"dbt-loom: Config file `{path}` does not exist" 241 | ) 242 | return None 243 | 244 | with open(path) as file: 245 | config_content = file.read() 246 | 247 | config_content = self.replace_env_variables(config_content) 248 | 249 | return dbtLoomConfig(**yaml.load(config_content, yaml.SafeLoader)) 250 | 251 | @staticmethod 252 | def replace_env_variables(config_str: str) -> str: 253 | """Replace environment variable placeholders in the configuration string.""" 254 | pattern = r"\$(\w+)|\$\{([^}]+)\}" 255 | return re.sub( 256 | pattern, 257 | lambda match: os.environ.get( 258 | match.group(1) if match.group(1) is not None else match.group(2), "" 259 | ), 260 | config_str, 261 | ) 262 | 263 | def initialize(self) -> None: 264 | """Initialize the plugin""" 265 | 266 | if self.models != {} or not self.config: 267 | return 268 | 269 | for manifest_reference in self.config.manifests: 270 | fire_event( 271 | msg=f"dbt-loom: Loading manifest for `{manifest_reference.name}`" 272 | f" from `{manifest_reference.type.value}`" 273 | ) 274 | 275 | manifest = self._manifest_loader.load(manifest_reference) 276 | if manifest is None: 277 | continue 278 | 279 | # Find the official project name from the manifest metadata and use that as the manifests key. 280 | manifest_name = manifest.get("metadata", {}).get( 281 | "project_name", manifest_reference.name 282 | ) 283 | self.manifests[manifest_name] = manifest 284 | 285 | selected_nodes = identify_node_subgraph(manifest) 286 | 287 | # Remove nodes from excluded packages. 288 | filtered_nodes = { 289 | key: value 290 | for key, value in selected_nodes.items() 291 | if value.package_name not in manifest_reference.excluded_packages 292 | } 293 | 294 | loom_nodes = convert_model_nodes_to_model_node_args(filtered_nodes) 295 | 296 | self.models.update(loom_nodes) 297 | 298 | @dbt_hook 299 | def get_nodes(self) -> PluginNodes: 300 | """ 301 | Inject PluginNodes to dbt for injection into dbt's DAG. 302 | """ 303 | fire_event(msg="dbt-loom: Injecting nodes") 304 | return PluginNodes(models=self.models) # type: ignore 305 | 306 | 307 | plugins = [dbtLoom] 308 | -------------------------------------------------------------------------------- /dbt_loom/clients/az_blob.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import gzip 4 | from io import BytesIO 5 | from typing import Dict 6 | 7 | from azure.identity import DefaultAzureCredential 8 | from azure.storage.blob import BlobServiceClient 9 | from pydantic import BaseModel 10 | 11 | 12 | class AzureReferenceConfig(BaseModel): 13 | """Configuration for an reference stored in Azure Storage""" 14 | 15 | container_name: str 16 | object_name: str 17 | account_name: str 18 | 19 | 20 | class AzureClient: 21 | """A client for loading manifest files from Azure storage.""" 22 | 23 | def __init__( 24 | self, container_name: str, object_name: str, account_name: str 25 | ) -> None: 26 | self.account_name = account_name 27 | self.container_name = container_name 28 | self.object_name = object_name 29 | 30 | def load_manifest(self) -> Dict: 31 | """Load the manifest.json file from Azure storage.""" 32 | 33 | connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING") 34 | try: 35 | if connection_string: 36 | blob_service_client = BlobServiceClient.from_connection_string( 37 | connection_string 38 | ) 39 | else: 40 | account_url = f"{self.account_name}.blob.core.windows.net" 41 | blob_service_client = BlobServiceClient( 42 | account_url, credential=DefaultAzureCredential() 43 | ) 44 | blob_client = blob_service_client.get_blob_client( 45 | container=self.container_name, blob=self.object_name 46 | ) 47 | except Exception as e: 48 | raise Exception( 49 | "Unable to connect to Azure. Please confirm your credentials, connection details, and network." 50 | ) 51 | 52 | # Deserialize the body of the object. 53 | try: 54 | if self.object_name.endswith('.gz'): 55 | with gzip.GzipFile(fileobj=BytesIO(blob_client.download_blob().readall())) as gzipfile: 56 | content = gzipfile.read().decode('utf-8') 57 | else: 58 | content = blob_client.download_blob(encoding="utf-8").readall() 59 | except Exception: 60 | raise Exception( 61 | f"Unable to read the data contained in the object `{self.object_name}" 62 | ) 63 | 64 | try: 65 | return json.loads(content) 66 | except Exception: 67 | raise Exception( 68 | f"The object `{self.object_name}` does not contain valid JSON." 69 | ) 70 | -------------------------------------------------------------------------------- /dbt_loom/clients/dbt_cloud.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any, Dict, Optional 3 | 4 | from pydantic import BaseModel 5 | import requests 6 | 7 | from dbt_loom.logging import fire_event 8 | 9 | 10 | class DbtCloudReferenceConfig(BaseModel): 11 | """Configuration for a dbt Cloud reference.""" 12 | 13 | account_id: int 14 | job_id: int 15 | api_endpoint: Optional[str] = None 16 | step: Optional[int] = None 17 | 18 | 19 | class DbtCloud: 20 | """API Client for dbt Cloud. Fetches latest manifest for a given dbt job.""" 21 | 22 | def __init__( 23 | self, 24 | account_id: int, 25 | token: Optional[str] = None, 26 | api_endpoint: Optional[str] = None, 27 | ) -> None: 28 | resolved_token = token or os.environ.get("DBT_CLOUD_API_TOKEN") 29 | if resolved_token is None: 30 | raise Exception( 31 | "A DBT Cloud token must be provided to dbt-loom when fetching manifest " 32 | "data from dbt Cloud. Please provide one via the `DBT_CLOUD_API_TOKEN` " 33 | "environment variable." 34 | ) 35 | 36 | self.__token: str = resolved_token 37 | 38 | self.account_id = account_id 39 | self.api_endpoint = api_endpoint or "https://cloud.getdbt.com/api/v2" 40 | 41 | def _query(self, endpoint: str, **kwargs) -> Dict: 42 | """Query the dbt Cloud Administrative API.""" 43 | url = f"{self.api_endpoint}/{endpoint}" 44 | fire_event(msg=f"Querying {url}") 45 | response = requests.get( 46 | url, 47 | headers={ 48 | "authorization": "Bearer " + self.__token, 49 | "content-type": "application/json", 50 | }, 51 | **kwargs, 52 | ) 53 | return response.json() 54 | 55 | def _get_manifest(self, run_id: int, step: Optional[int] = None) -> Dict[str, Any]: 56 | """Get the manifest json for a given dbt Cloud run.""" 57 | params = {} 58 | if step: 59 | params["step"] = step 60 | 61 | return self._query( 62 | f"accounts/{self.account_id}/runs/{run_id}/artifacts/manifest.json", 63 | params=params, 64 | ) 65 | 66 | def _get_latest_run(self, job_id: int) -> Dict[str, Any]: 67 | """Get the latest run performed by a dbt Cloud job.""" 68 | return self._query( 69 | f"accounts/{self.account_id}/runs/", 70 | params={ 71 | "job_definition_id": job_id, 72 | "status": 10, 73 | "order_by": "-finished_at", 74 | "limit": 1, 75 | }, 76 | )["data"][0] 77 | 78 | def get_models(self, job_id: int, step: Optional[int] = None) -> Dict[str, Any]: 79 | """Get the latest state of all models by Job ID.""" 80 | latest_run = self._get_latest_run(job_id=job_id) 81 | return self._get_manifest(run_id=latest_run["id"], step=step) 82 | -------------------------------------------------------------------------------- /dbt_loom/clients/gcs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import gzip 3 | from io import BytesIO 4 | from pathlib import Path 5 | from typing import Dict, Optional 6 | 7 | from google.cloud import storage 8 | from pydantic import BaseModel 9 | 10 | 11 | class GCSReferenceConfig(BaseModel): 12 | """Configuration for a GCS reference""" 13 | 14 | project_id: str 15 | bucket_name: str 16 | object_name: str 17 | credentials: Optional[Path] = None 18 | 19 | 20 | class GCSClient: 21 | """Client for GCS. Fetches manifest for a given bucket.""" 22 | 23 | def __init__( 24 | self, 25 | project_id: str, 26 | bucket_name: str, 27 | object_name: str, 28 | credentials: Optional[Path] = None, 29 | ) -> None: 30 | self.project_id = project_id 31 | self.bucket_name = bucket_name 32 | self.object_name = object_name 33 | self.credentials = credentials 34 | 35 | def load_manifest(self) -> Dict: 36 | """Load a manifest json from a GCS bucket.""" 37 | client = ( 38 | storage.Client.from_service_account_json( 39 | self.credentials, project=self.project_id 40 | ) 41 | if self.credentials 42 | else storage.Client(project=self.project_id) 43 | ) 44 | bucket = client.get_bucket(self.bucket_name) 45 | blob = bucket.get_blob(self.object_name) 46 | if not blob: 47 | raise Exception( 48 | f"The object `{self.object_name}` does not exist in bucket " 49 | f"`{self.bucket_name}`." 50 | ) 51 | 52 | if self.object_name.endswith('.gz'): 53 | compressed_manifest = blob.download_as_bytes() 54 | with gzip.GzipFile(fileobj=BytesIO(compressed_manifest)) as gzip_file: 55 | manifest_json = gzip_file.read() 56 | else: 57 | manifest_json = blob.download_as_text() 58 | 59 | try: 60 | return json.loads(manifest_json) 61 | except Exception: 62 | raise Exception( 63 | f"The object `{self.object_name}` does not contain valid JSON." 64 | ) 65 | -------------------------------------------------------------------------------- /dbt_loom/clients/s3.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Dict, Optional 4 | 5 | import boto3 6 | import gzip 7 | from io import BytesIO 8 | from pydantic import BaseModel 9 | 10 | 11 | class S3ReferenceConfig(BaseModel): 12 | """Configuration for an reference stored in S3""" 13 | 14 | bucket_name: str 15 | object_name: str 16 | credentials: Optional[Path] = None 17 | 18 | 19 | class S3Client: 20 | """A client for loading manifest files from S3-compatible object stores.""" 21 | 22 | def __init__(self, bucket_name: str, object_name: str) -> None: 23 | self.bucket_name = bucket_name 24 | self.object_name = object_name 25 | 26 | def load_manifest(self) -> Dict: 27 | """Load the manifest.json file from an S3 bucket.""" 28 | 29 | client = boto3.client("s3") 30 | 31 | # TODO: Determine if I need to add args for SSE 32 | try: 33 | response = client.get_object(Bucket=self.bucket_name, Key=self.object_name) 34 | except client.exceptions.NoSuchBucket: 35 | raise Exception(f"The bucket `{self.bucket_name}` does not exist.") 36 | except client.exceptions.NoSuchKey: 37 | raise Exception( 38 | f"The object `{self.object_name}` does not exist in bucket " 39 | f"`{self.bucket_name}`." 40 | ) 41 | 42 | # Deserialize the body of the object. 43 | try: 44 | if self.object_name.endswith(".gz"): 45 | body = response["Body"].read() 46 | with gzip.GzipFile(fileobj=BytesIO(body)) as gzipfile: 47 | content = gzipfile.read().decode('utf-8') 48 | else: 49 | content = response["Body"].read().decode("utf-8") 50 | except Exception: 51 | raise Exception( 52 | f"Unable to read the data contained in the object `{self.object_name}" 53 | ) 54 | 55 | try: 56 | return json.loads(content) 57 | except Exception: 58 | raise Exception( 59 | f"The object `{self.object_name}` does not contain valid JSON." 60 | ) 61 | -------------------------------------------------------------------------------- /dbt_loom/clients/snowflake_stage.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import json 3 | import tempfile 4 | from pathlib import Path, PurePosixPath 5 | from typing import Dict 6 | 7 | from dbt.config.runtime import load_profile 8 | from dbt.flags import get_flags 9 | from dbt_loom.logging import fire_event 10 | from pydantic import BaseModel 11 | 12 | 13 | class SnowflakeReferenceConfig(BaseModel): 14 | """Configuration for an reference stored in Snowflake Stage""" 15 | 16 | stage: str 17 | stage_path: str 18 | 19 | 20 | class SnowflakeClient: 21 | """A client for loading manifest files from Snowflake Stage.""" 22 | 23 | def __init__(self, stage: str, stage_path: str) -> None: 24 | self.stage = stage 25 | self.stage_path = stage_path.lstrip("/") 26 | 27 | def load_manifest(self) -> Dict: 28 | """Load the manifest.json file from Snowflake stage.""" 29 | 30 | try: 31 | from dbt.adapters.snowflake import SnowflakeAdapter 32 | except ImportError as exception: 33 | fire_event( 34 | msg="dbt-core: Fatal error. Expected to find dbt-snowflake " 35 | "installed to support loading the manifest from a Snowflake " 36 | "stage.", 37 | ) 38 | raise exception 39 | 40 | try: 41 | from dbt.mp_context import get_mp_context 42 | except ImportError as exception: 43 | fire_event( 44 | msg="dbt-core: Fatal error. Unable to initialize a Snowflake " 45 | "adapter. Loading from Snowflake stages requires dbt-core " 46 | "1.8.0 and newer." 47 | ) 48 | raise exception 49 | 50 | flags = get_flags() 51 | profile = load_profile( 52 | project_root=flags.PROJECT_DIR, 53 | cli_vars=flags.VARS, 54 | profile_name_override=flags.PROFILE, 55 | target_override=flags.TARGET, 56 | ) 57 | adapter = SnowflakeAdapter(profile, get_mp_context()) 58 | file_name = str(PurePosixPath(self.stage_path).name) 59 | tmp_dir = tempfile.mkdtemp(prefix="dbt_loom_") 60 | 61 | # Snowflake needs '/' path separators 62 | tmp_dir_sf = tmp_dir.replace("\\", "/") 63 | 64 | with adapter.connection_named("dbt-loom"): 65 | get_query = f"get @{self.stage}/{self.stage_path} file://{tmp_dir_sf}/" 66 | response, table = adapter.connections.execute(get_query) 67 | if response.rows_affected == 0: 68 | raise Exception( 69 | f"Failed to get file {self.stage}/{self.stage_path}: {response}" 70 | ) 71 | 72 | download_path = Path(tmp_dir) / file_name 73 | 74 | if download_path.name.endswith(".gz"): 75 | with gzip.GzipFile(download_path) as gzip_file: 76 | content = gzip_file.read().decode("utf-8") 77 | else: 78 | with download_path.open("r") as f: 79 | content = f.read() 80 | 81 | return json.loads(content) 82 | -------------------------------------------------------------------------------- /dbt_loom/config.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from pathlib import Path 3 | import re 4 | from typing import List, Union 5 | from urllib.parse import ParseResult, urlparse 6 | 7 | from pydantic import BaseModel, Field, validator 8 | 9 | from dbt_loom.clients.az_blob import AzureReferenceConfig 10 | from dbt_loom.clients.dbt_cloud import DbtCloudReferenceConfig 11 | from dbt_loom.clients.gcs import GCSReferenceConfig 12 | from dbt_loom.clients.s3 import S3ReferenceConfig 13 | from dbt_loom.clients.snowflake_stage import SnowflakeReferenceConfig 14 | 15 | 16 | class ManifestReferenceType(str, Enum): 17 | """Type of ManifestReference""" 18 | 19 | file = "file" 20 | dbt_cloud = "dbt_cloud" 21 | gcs = "gcs" 22 | s3 = "s3" 23 | azure = "azure" 24 | snowflake = "snowflake" 25 | 26 | 27 | class FileReferenceConfig(BaseModel): 28 | """Configuration for a file reference""" 29 | 30 | path: ParseResult 31 | 32 | @validator("path", pre=True, always=True) 33 | def default_path(cls, v, values) -> ParseResult: 34 | """ 35 | Check if the provided path is a valid URL. If not, convert it into an 36 | absolute file path. 37 | """ 38 | 39 | if isinstance(v, ParseResult): 40 | return v 41 | 42 | if bool(re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", v)): 43 | return urlparse(v) 44 | 45 | return urlparse(Path(v).absolute().as_uri()) 46 | 47 | 48 | class ManifestReference(BaseModel): 49 | """Reference information for a manifest to be loaded into dbt-loom.""" 50 | 51 | name: str 52 | type: ManifestReferenceType 53 | config: Union[ 54 | FileReferenceConfig, 55 | DbtCloudReferenceConfig, 56 | GCSReferenceConfig, 57 | S3ReferenceConfig, 58 | AzureReferenceConfig, 59 | SnowflakeReferenceConfig, 60 | ] 61 | excluded_packages: List[str] = Field(default_factory=list) 62 | optional: bool = False 63 | 64 | 65 | class dbtLoomConfig(BaseModel): 66 | """Configuration for dbt Loom""" 67 | 68 | manifests: List[ManifestReference] 69 | enable_telemetry: bool = False 70 | 71 | 72 | class LoomConfigurationError(BaseException): 73 | """Error raised when dbt-loom has been misconfigured.""" 74 | -------------------------------------------------------------------------------- /dbt_loom/logging.py: -------------------------------------------------------------------------------- 1 | try: 2 | import dbt_common.events.functions as dbt_event_function 3 | from dbt_common.events.types import Note 4 | except ModuleNotFoundError: 5 | import dbt.events.functions as dbt_event_function # type: ignore 6 | from dbt.events.types import Note # type: ignore 7 | 8 | 9 | def fire_event(*args, **kwargs) -> None: 10 | """Fire a dbt-core event.""" 11 | dbt_event_function.fire_event(Note(*args, **kwargs)) 12 | -------------------------------------------------------------------------------- /dbt_loom/manifests.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from io import BytesIO 3 | import json 4 | import gzip 5 | import os 6 | from pathlib import Path 7 | from typing import Dict, List, Optional 8 | from urllib.parse import unquote, urlunparse 9 | 10 | from pydantic import BaseModel, Field, validator 11 | import requests 12 | 13 | from dbt_loom.clients.snowflake_stage import SnowflakeReferenceConfig, SnowflakeClient 14 | 15 | try: 16 | from dbt.artifacts.resources.types import NodeType 17 | except ModuleNotFoundError: 18 | from dbt.node_types import NodeType # type: ignore 19 | 20 | from dbt_loom.clients.az_blob import AzureClient, AzureReferenceConfig 21 | from dbt_loom.clients.dbt_cloud import DbtCloud, DbtCloudReferenceConfig 22 | from dbt_loom.clients.gcs import GCSClient, GCSReferenceConfig 23 | from dbt_loom.clients.s3 import S3Client, S3ReferenceConfig 24 | from dbt_loom.config import ( 25 | FileReferenceConfig, 26 | LoomConfigurationError, 27 | ManifestReference, 28 | ManifestReferenceType, 29 | ) 30 | 31 | 32 | class DependsOn(BaseModel): 33 | """Wrapper for storing dependencies""" 34 | 35 | nodes: List[str] = Field(default_factory=list) 36 | macros: List[str] = Field(default_factory=list) 37 | 38 | 39 | class ManifestNode(BaseModel, use_enum_values=True): 40 | """A basic ManifestNode that can be referenced across projects.""" 41 | 42 | name: str 43 | package_name: str 44 | unique_id: str 45 | resource_type: NodeType 46 | schema_name: str = Field(alias="schema") 47 | database: Optional[str] = None 48 | relation_name: Optional[str] = None 49 | version: Optional[str] = None 50 | latest_version: Optional[str] = None 51 | deprecation_date: Optional[datetime.datetime] = None 52 | access: Optional[str] = "protected" 53 | group: Optional[str] = None 54 | generated_at: datetime.datetime = Field(default_factory=datetime.datetime.utcnow) 55 | depends_on: Optional[DependsOn] = None 56 | depends_on_nodes: List[str] = Field(default_factory=list) 57 | enabled: bool = True 58 | 59 | @validator("depends_on_nodes", always=True) 60 | def default_depends_on_nodes(cls, v, values): 61 | depends_on = values.get("depends_on") 62 | if depends_on is None: 63 | return [] 64 | 65 | return [ 66 | node for node in depends_on.nodes if node.split(".")[0] not in ("source") 67 | ] 68 | 69 | @validator("resource_type", always=True) 70 | def fix_resource_types(cls, v, values): 71 | """If the resource type does not match the unique_id prefix, then rewrite the resource type.""" 72 | 73 | node_type = values.get("unique_id").split(".")[0] 74 | if v != node_type: 75 | return node_type 76 | return v 77 | 78 | @property 79 | def identifier(self) -> str: 80 | if not self.relation_name: 81 | return self.name 82 | 83 | return self.relation_name.split(".")[-1].replace('"', "").replace("`", "") 84 | 85 | def dump(self) -> Dict: 86 | """Dump the ManifestNode to a Dict, with support for pydantic 1 and 2""" 87 | exclude_set = {"schema_name", "depends_on", "node_config", "unique_id"} 88 | if hasattr(self, "model_dump"): 89 | return self.model_dump(exclude=exclude_set) # type: ignore 90 | 91 | return self.dict(exclude=exclude_set) 92 | 93 | 94 | class UnknownManifestPathType(Exception): 95 | """Raised when the ManifestLoader receives a FileReferenceConfig with a path that does not have a known URL scheme.""" 96 | 97 | 98 | class InvalidManifestPath(Exception): 99 | """Raised when the ManifestLoader receives a FileReferenceConfig with an invalid path.""" 100 | 101 | 102 | class ManifestLoader: 103 | def __init__(self): 104 | self.loading_functions = { 105 | ManifestReferenceType.file: self.load_from_path, 106 | ManifestReferenceType.dbt_cloud: self.load_from_dbt_cloud, 107 | ManifestReferenceType.gcs: self.load_from_gcs, 108 | ManifestReferenceType.s3: self.load_from_s3, 109 | ManifestReferenceType.azure: self.load_from_azure, 110 | ManifestReferenceType.snowflake: self.load_from_snowflake, 111 | } 112 | 113 | @staticmethod 114 | def load_from_path(config: FileReferenceConfig) -> Dict: 115 | """ 116 | Load a manifest dictionary based on a FileReferenceConfig. This config's 117 | path can point to either a local file or a URL to a remote location. 118 | """ 119 | 120 | if config.path.scheme in ("http", "https"): 121 | return ManifestLoader.load_from_http(config) 122 | 123 | if config.path.scheme in ("file"): 124 | return ManifestLoader.load_from_local_filesystem(config) 125 | 126 | raise UnknownManifestPathType() 127 | 128 | @staticmethod 129 | def load_from_local_filesystem(config: FileReferenceConfig) -> Dict: 130 | """Load a manifest dictionary from a local file""" 131 | 132 | if not config.path.path: 133 | raise InvalidManifestPath() 134 | 135 | if config.path.netloc: 136 | file_path = Path(f"//{config.path.netloc}{config.path.path}") 137 | else: 138 | file_path = Path( 139 | unquote( 140 | config.path.path.lstrip("/") 141 | if os.name == "nt" 142 | else config.path.path 143 | ) 144 | ) 145 | 146 | if not file_path.exists(): 147 | raise LoomConfigurationError(f"The path `{file_path}` does not exist.") 148 | 149 | if file_path.suffix == ".gz": 150 | with gzip.open(file_path, "rt") as file: 151 | return json.load(file) 152 | 153 | return json.load(open(file_path)) 154 | 155 | @staticmethod 156 | def load_from_http(config: FileReferenceConfig) -> Dict: 157 | """Load a manifest dictionary from a local file""" 158 | 159 | if not config.path.path: 160 | raise InvalidManifestPath() 161 | 162 | response = requests.get(urlunparse(config.path), stream=True) 163 | response.raise_for_status() # Check for request errors 164 | 165 | # Check for compression on the file. If compressed, store it in a buffer 166 | # and decompress it. 167 | if ( 168 | config.path.path.endswith(".gz") 169 | or response.headers.get("Content-Encoding") == "gzip" 170 | ): 171 | with gzip.GzipFile(fileobj=BytesIO(response.content)) as gz_file: 172 | return json.load(gz_file) 173 | 174 | return response.json() 175 | 176 | @staticmethod 177 | def load_from_dbt_cloud(config: DbtCloudReferenceConfig) -> Dict: 178 | """Load a manifest dictionary from dbt Cloud.""" 179 | client = DbtCloud( 180 | account_id=config.account_id, api_endpoint=config.api_endpoint 181 | ) 182 | 183 | return client.get_models(config.job_id, step=config.step) 184 | 185 | @staticmethod 186 | def load_from_gcs(config: GCSReferenceConfig) -> Dict: 187 | """Load a manifest dictionary from a GCS bucket.""" 188 | gcs_client = GCSClient( 189 | project_id=config.project_id, 190 | bucket_name=config.bucket_name, 191 | object_name=config.object_name, 192 | credentials=config.credentials, 193 | ) 194 | 195 | return gcs_client.load_manifest() 196 | 197 | @staticmethod 198 | def load_from_s3(config: S3ReferenceConfig) -> Dict: 199 | """Load a manifest dictionary from an S3-compatible bucket.""" 200 | gcs_client = S3Client( 201 | bucket_name=config.bucket_name, 202 | object_name=config.object_name, 203 | ) 204 | 205 | return gcs_client.load_manifest() 206 | 207 | @staticmethod 208 | def load_from_azure(config: AzureReferenceConfig) -> Dict: 209 | """Load a manifest dictionary from Azure storage.""" 210 | azure_client = AzureClient( 211 | container_name=config.container_name, 212 | object_name=config.object_name, 213 | account_name=config.account_name, 214 | ) 215 | 216 | return azure_client.load_manifest() 217 | 218 | @staticmethod 219 | def load_from_snowflake(config: SnowflakeReferenceConfig) -> Dict: 220 | """Load a manifest dictionary from Snowflake stage.""" 221 | snowflake_client = SnowflakeClient( 222 | stage=config.stage, stage_path=config.stage_path 223 | ) 224 | 225 | return snowflake_client.load_manifest() 226 | 227 | def load(self, manifest_reference: ManifestReference) -> Dict: 228 | """Load a manifest dictionary based on a ManifestReference input.""" 229 | 230 | if manifest_reference.type not in self.loading_functions: 231 | raise LoomConfigurationError( 232 | f"The manifest reference provided for {manifest_reference.name} does " 233 | "not have a valid type." 234 | ) 235 | 236 | try: 237 | manifest = self.loading_functions[manifest_reference.type]( 238 | manifest_reference.config 239 | ) 240 | except LoomConfigurationError as e: 241 | if getattr(manifest_reference, "optional", False): 242 | return None 243 | raise 244 | 245 | return manifest 246 | -------------------------------------------------------------------------------- /dbt_loom/shims.py: -------------------------------------------------------------------------------- 1 | from typing import Mapping, Optional 2 | from dbt.contracts.graph.nodes import GraphMemberNode, ModelNode 3 | from dbt.contracts.graph.manifest import MaybeNonSource 4 | 5 | try: 6 | from dbt.artifacts.resources.types import NodeType, AccessType 7 | except ModuleNotFoundError: 8 | from dbt.node_types import NodeType, AccessType # type: ignore 9 | 10 | 11 | def is_invalid_protected_ref( 12 | self, 13 | node: GraphMemberNode, 14 | target_model: MaybeNonSource, 15 | dependencies: Optional[Mapping], 16 | ) -> bool: 17 | dependencies = dependencies or {} 18 | if not isinstance(target_model, ModelNode): 19 | return False 20 | 21 | is_protected_ref = ( 22 | target_model.access == AccessType.Protected 23 | # don't raise this reference error for ad hoc 'preview' queries 24 | and node.resource_type != NodeType.SqlOperation 25 | and node.resource_type != NodeType.RPCCall # TODO: rm 26 | ) 27 | target_dependency = dependencies.get(target_model.package_name) 28 | restrict_package_access = ( 29 | target_dependency.restrict_access if target_dependency else False 30 | ) 31 | 32 | return is_protected_ref and ( 33 | node.package_name != target_model.package_name and restrict_package_access 34 | ) 35 | 36 | 37 | def is_invalid_private_ref( 38 | self, 39 | node: GraphMemberNode, 40 | target_model: MaybeNonSource, 41 | dependencies: Optional[Mapping], 42 | ) -> bool: 43 | dependencies = dependencies or {} 44 | if not isinstance(target_model, ModelNode): 45 | return False 46 | 47 | is_private_ref = ( 48 | target_model.access == AccessType.Private 49 | # don't raise this reference error for ad hoc 'preview' queries 50 | and node.resource_type != NodeType.SqlOperation 51 | and node.resource_type != NodeType.RPCCall # TODO: rm 52 | ) 53 | target_dependency = dependencies.get(target_model.package_name) 54 | restrict_package_access = ( 55 | target_dependency.restrict_access if target_dependency else False 56 | ) 57 | 58 | return is_private_ref and ( 59 | # Invalid reference because the group does not match 60 | (hasattr(node, "group") and node.group and node.group != target_model.group) # type: ignore 61 | # Or, invalid because these are different namespaces (project/package) and restrict-access is enforced 62 | or (node.package_name != target_model.package_name and restrict_package_access) 63 | ) 64 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | The dbt-loom community exists to provide a free, unencumbered, and 6 | vendor-agnostic means of enabling multi-project deployments of dbt-core. 7 | We accept that for this project to be successful, we must create a 8 | community that is open, curious, and accepting of each others differences. 9 | 10 | We as members, contributors, and leaders pledge to make participation in our 11 | community a harassment-free experience for everyone, regardless of age, body 12 | size, visible or invisible disability, ethnicity, sex characteristics, gender 13 | identity and expression, level of experience, education, socio-economic status, 14 | nationality, personal appearance, race, religion, sexual identity 15 | and orientation, or employer. 16 | 17 | We pledge to act and interact in ways that contribute to an open, welcoming, 18 | diverse, inclusive, and healthy community. 19 | 20 | ## Our Standards 21 | 22 | Examples of behavior that contributes to a positive environment for our 23 | community include: 24 | 25 | - Demonstrating empathy and kindness toward other people 26 | - Being respectful of differing opinions, viewpoints, and experiences 27 | - Giving and gracefully accepting constructive feedback 28 | - Accepting responsibility and apologizing to those affected by our mistakes, 29 | and learning from the experience 30 | - Focusing on what is best not just for us as individuals or for specific 31 | corporate interests, but for the overall community 32 | 33 | Examples of unacceptable behavior include: 34 | 35 | - The use of sexualized language or imagery, and sexual attention or 36 | advances of any kind 37 | - Trolling, insulting or derogatory comments, and personal or political attacks 38 | - Public or private harassment 39 | - Publishing others' private information, such as a physical or email 40 | address, without their explicit permission 41 | - Other conduct which could reasonably be considered inappropriate in a 42 | professional setting 43 | 44 | ## Enforcement Responsibilities 45 | 46 | Community leaders are responsible for clarifying and enforcing our standards of 47 | acceptable behavior and will take appropriate and fair corrective action in 48 | response to any behavior that they deem inappropriate, threatening, offensive, 49 | or harmful. 50 | 51 | Community leaders have the right and responsibility to remove, edit, or reject 52 | comments, commits, code, wiki edits, issues, and other contributions that are 53 | not aligned to this Code of Conduct, and will communicate reasons for moderation 54 | decisions when appropriate. 55 | 56 | ## Scope 57 | 58 | This Code of Conduct applies within all community spaces, and also applies when 59 | an individual is officially representing the community in public spaces. 60 | Examples of representing our community include using an official email address, 61 | posting via an official social media account, or acting as an appointed 62 | representative at an online or offline event. 63 | 64 | ## Enforcement 65 | 66 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 67 | reported to the community leaders responsible for enforcement at 68 | yager@nicholasyager.com. All complaints will be reviewed and investigated 69 | promptly and fairly. 70 | 71 | All community leaders are obligated to respect the privacy and security of the 72 | reporter of any incident. 73 | 74 | ## Enforcement Guidelines 75 | 76 | Community leaders will follow these Community Impact Guidelines in determining 77 | the consequences for any action they deem in violation of this Code of Conduct: 78 | 79 | ### 1. Correction 80 | 81 | **Community Impact**: Use of inappropriate language or other behavior deemed 82 | unprofessional or unwelcome in the community. 83 | 84 | **Consequence**: A private, written warning from community leaders, providing 85 | clarity around the nature of the violation and an explanation of why the 86 | behavior was inappropriate. A public apology may be requested. 87 | 88 | ### 2. Warning 89 | 90 | **Community Impact**: A violation through a single incident or series 91 | of actions. 92 | 93 | **Consequence**: A warning with consequences for continued behavior. No 94 | interaction with the people involved, including unsolicited interaction with 95 | those enforcing the Code of Conduct, for a specified period of time. This 96 | includes avoiding interactions in community spaces as well as external channels 97 | like social media. Violating these terms may lead to a temporary or 98 | permanent ban. 99 | 100 | ### 3. Temporary Ban 101 | 102 | **Community Impact**: A serious violation of community standards, including 103 | sustained inappropriate behavior. 104 | 105 | **Consequence**: A temporary ban from any sort of interaction or public 106 | communication with the community for a specified period of time. No public or 107 | private interaction with the people involved, including unsolicited interaction 108 | with those enforcing the Code of Conduct, is allowed during this period. 109 | Violating these terms may lead to a permanent ban. 110 | 111 | ### 4. Permanent Ban 112 | 113 | **Community Impact**: Demonstrating a pattern of violation of community 114 | standards, including sustained inappropriate behavior, harassment of an 115 | individual, or aggression toward or disparagement of classes of individuals. 116 | 117 | **Consequence**: A permanent ban from any sort of public interaction within 118 | the community. 119 | 120 | ## Attribution 121 | 122 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 123 | version 2.0, available at 124 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 125 | 126 | Community Impact Guidelines were inspired by 127 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 128 | 129 | For answers to common questions about this code of conduct, see the FAQ at 130 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 131 | at [https://www.contributor-covenant.org/translations][translations]. 132 | 133 | [homepage]: https://www.contributor-covenant.org 134 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 135 | [Mozilla CoC]: https://github.com/mozilla/diversity 136 | [FAQ]: https://www.contributor-covenant.org/faq 137 | [translations]: https://www.contributor-covenant.org/translations 138 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to dbt-loom 2 | 3 | Thank you for taking the time to contribute! 🎉💃 4 | 5 | The following is a set of guidelines for contributing to dbt-loom. These are 6 | mostly guidelines, not rules. Use your best judgment, and feel free to propose 7 | changes to this document in a pull request. 8 | 9 | ## How Can I Contribute? 10 | 11 | ### Reporting Bugs 12 | 13 | If you find a bug, please report it by opening an issue on GitHub. Make sure to 14 | include: 15 | 16 | - A clear and descriptive title. 17 | - Steps to reproduce the problem. 18 | - Expected behavior. 19 | - Actual behavior. 20 | - Any relevant logs or screenshots. 21 | 22 | ### Suggesting Enhancements 23 | 24 | If you have an idea to enhance dbt-loom, we'd love to hear about it! Please 25 | open an issue on GitHub and include: 26 | 27 | - A clear and descriptive title. 28 | - A detailed description of the proposed enhancement. 29 | - Any relevant use cases or examples. 30 | 31 | ### Pull Requests 32 | 33 | When you're ready to start working on an issue, fork the repository and create 34 | a new branch for your work. Follow these steps: 35 | 36 | 1. Fork the repository and clone your fork. 37 | 2. Create a new branch: `git checkout -b feature/my-feature-branch`. 38 | 3. Make your changes and commit them: `git commit -m 'Add some feature'`. 39 | 4. Push to the branch: `git push origin feature/my-feature-branch`. 40 | 5. Open a pull request. 41 | 42 | After creating the pull request, the PR will automatically notify the 43 | maintainers, and they will be able to trigger CI checks for your change. 44 | 45 | ### Code Style 46 | 47 | - Follow the existing code style. 48 | - Ensure your code passes all tests, including mypy. 49 | - Write tests for your code if applicable. 50 | 51 | ### Running Tests 52 | 53 | Make sure all tests pass before submitting a pull request. You can run the 54 | tests with: 55 | 56 | ``` 57 | pytest tests/ 58 | ``` 59 | 60 | ### Documentation 61 | 62 | Contributions to documentation are always welcome. If you see something that can be improved or needs clarification, feel free to make changes. 63 | 64 | ## Code of Conduct 65 | 66 | This project adheres to the [Contributor Covenant Code of Conduct](docs/CODE_OF_CONDUCT.md). 67 | By participating, you are expected to uphold this code. 68 | 69 | ## Getting Help 70 | 71 | If you need help or have any questions, feel free to open an issue on GitHub. 72 | 73 | Thank you for contributing! 74 | -------------------------------------------------------------------------------- /docs/advanced-configuration.md: -------------------------------------------------------------------------------- 1 | # Advanced Configuration 2 | 3 | `dbt-loom` also has a couple advanced configuration options for power users. 4 | 5 | ## Using environment variables in the `dbt-loom` config 6 | 7 | You can easily incorporate your own environment variables into the config file. This allows for dynamic configuration values that can change based on the environment. To specify an environment variable in the `dbt-loom` config file, use one of the following formats: 8 | 9 | `${ENV_VAR}` or `$ENV_VAR` 10 | 11 | ### Example: 12 | 13 | ```yaml 14 | manifests: 15 | - name: revenue 16 | type: gcs 17 | config: 18 | project_id: ${GCP_PROJECT} 19 | bucket_name: ${GCP_BUCKET} 20 | object_name: ${MANIFEST_PATH} 21 | ``` 22 | 23 | ## Exclude nested packages 24 | 25 | In some circumstances, like running `dbt-project-evaluator`, you may not want a 26 | given package in an upstream project to be imported into a downstream project. 27 | You can manually exclude downstream projects from injecting assets from packages 28 | by adding the package name to the downstream project's `excluded_packages` list. 29 | 30 | ```yaml 31 | manifests: 32 | - name: revenue 33 | type: file 34 | config: 35 | path: ../revenue/target/manifest.json 36 | excluded_packages: 37 | # Provide the string name of the package to exclude during injection. 38 | - dbt_project_evaluator 39 | ``` 40 | 41 | ## Gzipped files 42 | 43 | `dbt-loom` natively supports decompressing gzipped manifest files. This is useful to reduce object storage size and to minimize loading times when reading manifests from object storage. Compressed file detection is triggered when the file path for the manifest is suffixed 44 | with `.gz`. 45 | 46 | ```yaml 47 | manifests: 48 | - name: revenue 49 | type: s3 50 | config: 51 | bucket_name: example_bucket_name 52 | object_name: manifest.json.gz 53 | ``` 54 | 55 | ## Enabling Telemetry 56 | 57 | By default, the `dbt-loom` plugin blocks outbound telemetry that reports on 58 | the use of this plugin. This is a privacy-preserving measure for `dbt-loom` 59 | users that does not impact the function of dbt-core and does not impede 60 | dbt-core development in any way. If you _want_ this telemetry to be sent, you 61 | can re-enable this behavior by setting the `enable_telemetry` property 62 | in the `dbt_loom.config.yml` file. 63 | 64 | ```yaml 65 | enable_telemetry: true 66 | manifests: ... 67 | ``` 68 | -------------------------------------------------------------------------------- /docs/getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | To begin, install the `dbt-loom` python package. 4 | 5 | ```console 6 | pip install dbt-loom 7 | ``` 8 | 9 | Next, create a `dbt-loom` configuration file. This configuration file provides the paths for your 10 | upstream project's manifest files. 11 | 12 | ```yaml 13 | manifests: 14 | - name: project_name # This should match the project's real name 15 | type: file 16 | config: 17 | # A path to your manifest. This can be either a local path, or a remote 18 | # path accessible via http(s). 19 | path: path/to/manifest.json 20 | ``` 21 | 22 | By default, `dbt-loom` will look for `dbt_loom.config.yml` in your working directory. You can also set the 23 | `DBT_LOOM_CONFIG` environment variable. 24 | 25 | ## Using dbt Cloud as an artifact source 26 | 27 | You can use dbt-loom to fetch model definitions from dbt Cloud by setting up a `dbt-cloud` manifest in your `dbt-loom` config, and setting the `DBT_CLOUD_API_TOKEN` environment variable in your execution environment. 28 | 29 | ```yaml 30 | manifests: 31 | - name: project_name 32 | type: dbt_cloud 33 | config: 34 | account_id: 35 | 36 | # Job ID pertains to the job that you'd like to fetch artifacts from. 37 | job_id: 38 | 39 | api_endpoint: 40 | # dbt Cloud has multiple regions with different URLs. Update this to 41 | # your appropriate dbt cloud endpoint. 42 | 43 | step_id: 44 | # If your job generates multiple artifacts, you can set the step from 45 | # which to fetch artifacts. Defaults to the last step. 46 | ``` 47 | 48 | ## Using an S3-compatible object store as an artifact source 49 | 50 | You can use dbt-loom to fetch manifest files from S3-compatible object stores 51 | by setting up ab `s3` manifest in your `dbt-loom` config. Please note that this 52 | approach supports all standard boto3-compatible environment variables and authentication mechanisms. Please see the [boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables) for more details. 53 | 54 | ```yaml 55 | manifests: 56 | - name: project_name 57 | type: s3 58 | config: 59 | bucket_name: 60 | # The name of the bucket where your manifest is stored. 61 | 62 | object_name: 63 | # The object name of your manifest file. 64 | ``` 65 | 66 | ## Using GCS as an artifact source 67 | 68 | You can use dbt-loom to fetch manifest files from Google Cloud Storage by setting up a `gcs` manifest in your `dbt-loom` config. 69 | 70 | ```yaml 71 | manifests: 72 | - name: project_name 73 | type: gcs 74 | config: 75 | project_id: 76 | # The alphanumeric ID of the GCP project that contains your target bucket. 77 | 78 | bucket_name: 79 | # The name of the bucket where your manifest is stored. 80 | 81 | object_name: 82 | # The object name of your manifest file. 83 | 84 | credentials: 85 | # The OAuth2 Credentials to use. If not passed, falls back to the default inferred from the environment. 86 | ``` 87 | 88 | ## Using Azure Storage as an artifact source 89 | 90 | You can use dbt-loom to fetch manifest files from Azure Storage 91 | by setting up an `azure` manifest in your `dbt-loom` config. The `azure` type implements 92 | the [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python) 93 | class, supporting all environment variables and authentication mechanisms. 94 | Alternatively, set the `AZURE_STORAGE_CONNECTION_STRING` environment variable to 95 | authenticate via a connection string. 96 | 97 | ```yaml 98 | manifests: 99 | - name: project_name 100 | type: azure 101 | config: 102 | account_name: # The name of your Azure Storage account 103 | container_name: # The name of your Azure Storage container 104 | object_name: # The object name of your manifest file. 105 | ``` 106 | 107 | ## Using Snowflake Stage as an artifact source 108 | 109 | You can use dbt-loom to fetch manifest files from Snowflake Stage by setting up a `snowflake` manifest in your `dbt-loom` config. Please note that this only 110 | works for dbt-core versions 1.8.0 and newer. 111 | 112 | ```yaml 113 | manifests: 114 | - name: project_name 115 | type: snowflake 116 | config: 117 | stage: stage_name # Stage name, can include Database/Schema 118 | stage_path: path/to/dbt/manifest.json # Path to manifest file in the stage 119 | ``` 120 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # dbt-loom 2 | 3 | `dbt-loom` is a dbt Core plugin that weaves together multi-project deployments. It works by fetching public model definitions from your dbt artifacts, and injecting those models into your dbt project. 4 | 5 | ```mermaid 6 | flowchart LR 7 | 8 | subgraph TOP[Your Infrastructure] 9 | direction TB 10 | dbt_runtime[dbt Core] 11 | proprietary_plugin[Open Source Metadata Plugin] 12 | 13 | files[Local and Remote Files] 14 | object_storage[Object Storage] 15 | discovery_api[dbt Cloud APIs] 16 | 17 | discovery_api --> proprietary_plugin 18 | files --> proprietary_plugin 19 | object_storage --> proprietary_plugin 20 | proprietary_plugin --> dbt_runtime 21 | end 22 | 23 | Project --> TOP --> Warehouse 24 | ``` 25 | 26 | dbt-loom currently supports obtaining model definitions from: 27 | 28 | - Local manifest files 29 | - Remote manifest files via http(s) 30 | - dbt Cloud 31 | - GCS 32 | - S3-compatible object storage services 33 | - Azure Storage 34 | 35 | ## How does it work? 36 | 37 | As of dbt-core 1.6.0-b8, there now exists a `dbtPlugin` class which defines functions that can 38 | be called by dbt-core's `PluginManger`. During different parts of the dbt-core lifecycle (such as graph linking and 39 | manifest writing), the `PluginManger` will be called and all plugins registered with the appropriate hook will be executed. 40 | 41 | dbt-loom implements a `get_nodes` hook, and uses a configuration file to parse manifests, identify public models, and 42 | inject those public models when called by `dbt-core`. 43 | 44 | ## Known Caveats 45 | 46 | Cross-project dependencies are a relatively new development, and dbt-core plugins 47 | are still in beta. As such there are a number of caveats to be aware of when using 48 | this tool. 49 | 50 | 1. dbt plugins are only supported in dbt-core version 1.6.0-b8 and newer. This means you must be using a dbt adapter 51 | compatible with this version. 52 | 2. `PluginNodeArgs` are not fully-realized dbt `ManifestNode`s, so documentation generated by `dbt docs generate` may 53 | be sparse when viewing injected models. 54 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: dbt_loom 2 | 3 | theme: 4 | palette: 5 | # Palette toggle for light mode 6 | - media: "(prefers-color-scheme: light)" 7 | scheme: default 8 | primary: custom 9 | accent: custom 10 | toggle: 11 | icon: material/brightness-7 12 | name: Switch to dark mode 13 | 14 | # Palette toggle for dark mode 15 | - media: "(prefers-color-scheme: dark)" 16 | scheme: slate 17 | primary: custom 18 | accent: custom 19 | toggle: 20 | icon: material/brightness-4 21 | name: Switch to light mode 22 | 23 | # primary: black 24 | name: material 25 | features: 26 | - navigation.footer 27 | - navigation.instant 28 | - navigation.tracking 29 | - content.action.edit 30 | - toc.integrate # check feedback 31 | 32 | extra: 33 | version: 34 | provider: mike 35 | 36 | markdown_extensions: 37 | - attr_list # needed to allow providing width 38 | - md_in_html # to allow Markdown in details 39 | - toc: 40 | toc_depth: 3 41 | permalink: "#" 42 | - pymdownx.highlight: 43 | anchor_linenums: true 44 | line_spans: __span 45 | pygments_lang_class: true 46 | - pymdownx.inlinehilite 47 | - pymdownx.snippets 48 | - pymdownx.superfences: 49 | custom_fences: 50 | - name: mermaid 51 | class: mermaid 52 | format: !!python/name:pymdownx.superfences.fence_code_format 53 | - pymdownx.details # allow collapsible blocks 54 | - admonition 55 | 56 | repo_url: https://github.com/nicholasyager/dbt-loom 57 | repo_name: nicholasyager/dbt-loom 58 | edit_uri: edit/main/docs/ 59 | 60 | nav: 61 | - Home: index.md 62 | - Getting started: getting-started.md 63 | - Advanced configuration: advanced-configuration.md 64 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "dbt-loom" 3 | version = "0.8.0" 4 | description = "A dbt-core plugin to import public nodes in multi-project deployments." 5 | authors = ["Nicholas Yager "] 6 | readme = "README.md" 7 | packages = [{ include = "dbt_loom" }] 8 | 9 | [tool.commitizen] 10 | version = "0.8.0" 11 | version_files = ["pyproject.toml:^version"] 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.9,<4.0" 15 | dbt-core = ">=1.6.0,<1.10.0" 16 | requests = "^2.31.0" 17 | google-cloud-storage = "^2.13.0" 18 | boto3 = "^1.28.84" 19 | azure-storage-blob = "^12.19.0" 20 | azure-identity = "^1.15.0" 21 | types-pyyaml = "^6.0.12.12" 22 | types-networkx = "^3.2.1.20240313" 23 | 24 | [tool.poetry.group.dev.dependencies] 25 | ruff = "^0.3.0" 26 | pytest = "^7.4.0" 27 | isort = "^5.12.0" 28 | dbt-duckdb = ">=1.6.0,<1.10.0" 29 | duckdb = ">=0.8.0" 30 | pre-commit = "^3.6.0" 31 | mypy = "^1.8.0" 32 | 33 | [tool.poetry.extras] 34 | snowflake = ["dbt-snowflake"] 35 | 36 | [tool.poetry.group.docs.dependencies] 37 | mkdocs-material = "^9.5.45" 38 | mike = "^2.1.3" 39 | 40 | [tool.ruff] 41 | line-length = 88 42 | 43 | [tool.isort] 44 | force_grid_wrap = 0 # Resolve conflict with Black 45 | line_length = 88 # Comply with Ruff and Black 46 | 47 | [tool.pytest.ini_options] 48 | testpaths = ["tests"] 49 | 50 | [build-system] 51 | requires = ["poetry-core"] 52 | build-backend = "poetry.core.masonry.api" 53 | -------------------------------------------------------------------------------- /test_projects/customer_success/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | *.duckdb 6 | *.duckdb.wal 7 | reports/sources/*.csv 8 | .meltano 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /test_projects/customer_success/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - id: requirements-txt-fixer 9 | - repo: https://github.com/charliermarsh/ruff-pre-commit 10 | rev: v0.0.245 11 | hooks: 12 | - id: ruff 13 | args: [--fix, --exit-non-zero-on-fix] 14 | - repo: https://github.com/pre-commit/mirrors-eslint 15 | rev: v8.34.0 16 | hooks: 17 | - id: eslint 18 | - repo: https://github.com/sqlfluff/sqlfluff 19 | rev: "2.0.0a4" 20 | hooks: 21 | - id: sqlfluff-lint 22 | additional_dependencies: 23 | ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"] 24 | - id: sqlfluff-fix 25 | additional_dependencies: 26 | ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"] 27 | - repo: https://github.com/psf/black 28 | rev: "23.1.0" 29 | hooks: 30 | - id: black 31 | # - repo: https://github.com/pre-commit/mirrors-prettier 32 | # rev: "" # Use the sha or tag you want to point at 33 | # hooks: 34 | # - id: prettier 35 | -------------------------------------------------------------------------------- /test_projects/customer_success/.sqlfluff: -------------------------------------------------------------------------------- 1 | [sqlfluff] 2 | dialect = duckdb 3 | templater = dbt 4 | runaway_limit = 10 5 | max_line_length = 80 6 | indent_unit = space 7 | 8 | [sqlfluff:indentation] 9 | tab_space_size = 4 10 | 11 | [sqlfluff:layout:type:comma] 12 | spacing_before = touch 13 | line_position = trailing 14 | 15 | [sqlfluff:rules:capitalisation.keywords] 16 | capitalisation_policy = lower 17 | 18 | [sqlfluff:rules:aliasing.table] 19 | aliasing = explicit 20 | 21 | [sqlfluff:rules:aliasing.column] 22 | aliasing = explicit 23 | 24 | [sqlfluff:rules:aliasing.expression] 25 | allow_scalar = False 26 | 27 | [sqlfluff:rules:capitalisation.identifiers] 28 | extended_capitalisation_policy = lower 29 | 30 | [sqlfluff:rules:capitalisation.functions] 31 | capitalisation_policy = lower 32 | 33 | [sqlfluff:rules:capitalisation.literals] 34 | capitalisation_policy = lower 35 | 36 | [sqlfluff:rules:ambiguous.column_references] # Number in group by 37 | group_by_and_order_by_style = implicit 38 | -------------------------------------------------------------------------------- /test_projects/customer_success/.sqlfluffignore: -------------------------------------------------------------------------------- 1 | reports 2 | target 3 | dbt_packages 4 | macros 5 | -------------------------------------------------------------------------------- /test_projects/customer_success/README.md: -------------------------------------------------------------------------------- 1 | # 🥪 The Jaffle Shop 🦘 2 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/dbt-labs/jaffle-shop-template?quickstart=1) 3 | [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/dbt-labs/jaffle-shop-template) 4 | 5 | This is a template for creating a fully functional dbt project for teaching, learning, writing, demoing, or any other scenarios where you need a basic project with a synthesized jaffle shop business. We recommend beginners use the following steps to open this project right here on GitHub in a Codespace. If you're a little more experienced with devcontainers and want to go faster 🏎️, you can use the Gitpod link above for a quicker startup and deeper feature set. 6 | 7 | ## How to use 8 | 9 | ### 1. Click the big green 'Use this template' button and 'Create a new repository'. 10 | 11 | ![Click use template](.github/static/use-template.gif) 12 | 13 | This will create a new repository exactly like this one, and navigate you there. Make sure to execute the next instructions in that repo. 14 | 15 | ### 2. Click 'Code', then 'Codespaces, then 'Create codespace on main'. 16 | 17 | ![Create codespace on main](.github/static/open-codespace.gif) 18 | 19 | This will create a new `codespace`, a sandboxed devcontainer with everything you need for a dbt project. Once the codespace is finished setting up, you'll be ready to run a `dbt build`. 20 | 21 | ### 3. Make sure to wait til the codespace is finished setting up. 22 | 23 | ![Codespaces setup screen at postCreateCommand](.github/static/codespaces-setup-screen.png) 24 | 25 | After the container is built and connected to, VSCode will run a few clean up commands and then a `postCreateCommand`, a set of commands run after the container is set up. This is where we install our dependencies, such as dbt, the duckdb adapter, and other necessities, as well as run `dbt deps` to install the dbt packages we want to use. That screen will look something like the above. When it's completed it will close and leave you in a fresh terminal prompt. From there you're ready to do some analytics engineering! 26 | 27 | ## Additional included tools 28 | 29 | This template includes two additional tools for the other parts of the stack to create a more realistic experience: 30 | 31 | - BI reporting built with [Evidence](https://evidence.dev) - an open source, code-based BI tool to write reports with markdown and SQL. 32 | - EL with [Meltano](https://meltano.com/) - an open source tool that provides a CLI & version control for ELT pipelines. 33 | 34 | ### Evidence 35 | 36 | With Evidence you can: 37 | 38 | - Version control your BI layer 39 | - Build reports in the same repo as your dbt project 40 | - Deploy your reports to a static site 41 | 42 | #### Running Evidence 43 | 44 | To run Evidence, use: 45 | 46 | ```shell 47 | cd reports 48 | npm run dev 49 | ``` 50 | 51 | See the [Evidence CLI docs](https://docs.evidence.dev/cli) for more details. 52 | 53 | You can make changes to the markdown pages in the `reports/pages` folder and see the reports update in the browser preview. 54 | 55 | #### Learning More about Evidence 56 | 57 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence) 58 | - [Project Home Page](https://www.evidence.dev) 59 | - [Github](https://github.com/evidence-dev/evidence) 60 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases) 61 | 62 | ### Meltano 63 | 64 | This project is preconfigured with Meltano, which can be used to extract and load raw data into DuckDB. 65 | 66 | #### Run EL (Extract and Load) using Meltano 67 | 68 | ```console 69 | meltano run tap-jaffle-shop target-duckdb 70 | ``` 71 | 72 | Optionally, you can modify extract parameters using environment variables. For instance, this modified version will extract five years of data instead of the default 1 year. 73 | 74 | ```console 75 | TAP_JAFFLE_SHOP_YEARS=5 76 | meltano run tap-jaffle-shop target-duckdb 77 | ``` 78 | 79 | You can also modify any tap or target config with the interactive `config` command: 80 | 81 | ```console 82 | meltano config tap-jaffle-shop set --interactive 83 | meltano config target-duckdb set --interactive 84 | ``` 85 | 86 | ## Local development 87 | 88 | This project is optimized for running in a container. If you'd like to use it locally outside of container you'll need to follow the instructions below. 89 | 90 | 1. Create a python virtual environment and install the dependencies. 91 | 92 | ```console 93 | python3 -m venv .venv 94 | source .venv/bin/activate 95 | pip install -r requirements.txt 96 | ``` 97 | 98 | 2. Install meltano with [pipx](https://pypa.github.io/pipx/installation/). And install meltano's dependencies. 99 | 100 | ```console 101 | pipx install meltano 102 | meltano install 103 | ``` 104 | 105 | 3. Run the EL pipeline. 106 | 107 | ```console 108 | meltano run el 109 | ``` 110 | 111 | 4. Install dbt dependencies and build the dbt project. 112 | 113 | ```console 114 | dbt deps 115 | dbt build 116 | ``` 117 | 118 | 5. Install Evidence dependencies and run the Evidence server. 119 | 120 | ```console 121 | cd reports 122 | npm install 123 | npm run dev 124 | ``` 125 | 126 | ## Contributing 127 | 128 | We welcome issues and PRs requesting or adding new features. The package that generates the synthetic data, [`jafgen`](https://pypi.org/project/jafgen/), is also under active development, and will add more types of source data to model as we go along. If you have tests, descriptions, new models, metrics, materializations types, or techniques you use this repo to demonstrate, which you feel would make for a more expansive baseline experience, we encourage you to consider contributing them back in so that this project becomes an even better collective tool for exploring and learning dbt over time. 129 | -------------------------------------------------------------------------------- /test_projects/customer_success/Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | tasks: 4 | deps: 5 | cmds: 6 | - python -m pip install --progress-bar off -r requirements.txt 7 | - dbt deps 8 | -------------------------------------------------------------------------------- /test_projects/customer_success/analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/analyses/.gitkeep -------------------------------------------------------------------------------- /test_projects/customer_success/dbt_loom.config.yml: -------------------------------------------------------------------------------- 1 | manifests: 2 | - name: potato 3 | type: file 4 | config: 5 | path: ../revenue/target/manifest.json 6 | excluded_packages: 7 | - dbt_project_evaluator 8 | -------------------------------------------------------------------------------- /test_projects/customer_success/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Name your project! Project names should contain only lowercase characters 2 | # and underscores. A good package name should reflect your organization's 3 | # name or the intended use of these models 4 | name: "customer_success" 5 | version: "1.0.0" 6 | config-version: 2 7 | 8 | # This setting configures which "profile" dbt uses for this project. 9 | profile: "customer_success" 10 | 11 | # These configurations specify where dbt should look for different types of files. 12 | # The `model-paths` config, for example, states that models in this project can be 13 | # found in the "models/" directory. You probably won't need to change these! 14 | model-paths: ["models"] 15 | analysis-paths: ["analyses"] 16 | test-paths: ["tests"] 17 | seed-paths: ["seeds"] 18 | macro-paths: ["macros"] 19 | snapshot-paths: ["snapshots"] 20 | 21 | target-path: "target" # directory which will store compiled SQL files 22 | clean-targets: # directories to be removed by `dbt clean` 23 | - "target" 24 | - "dbt_packages" 25 | 26 | vars: 27 | truncate_timespan_to: "{{ current_timestamp() }}" 28 | 29 | # Configuring models 30 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 31 | 32 | # In this example config, we tell dbt to build all models in the example/ directory 33 | # as tables. These settings can be overridden in the individual model files 34 | # using the `{{ config(...) }}` macro. 35 | 36 | models: 37 | customer_success: 38 | staging: 39 | +materialized: view 40 | marts: 41 | +materialized: table 42 | -------------------------------------------------------------------------------- /test_projects/customer_success/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/macros/.gitkeep -------------------------------------------------------------------------------- /test_projects/customer_success/macros/cents_to_dollars.sql: -------------------------------------------------------------------------------- 1 | {# A basic example for a project-wide macro to cast a column uniformly #} 2 | 3 | {% macro cents_to_dollars(column_name, precision=2) -%} 4 | ({{ column_name }} / 100)::numeric(16, {{ precision }}) 5 | {%- endmacro %} 6 | -------------------------------------------------------------------------------- /test_projects/customer_success/meltano.yml: -------------------------------------------------------------------------------- 1 | # Meltano Configuration File 2 | # 3 | # Sample usage: 4 | # > meltano run tap-jaffle-shop target-duckdb 5 | # 6 | # Or equivalently: 7 | # > meltano run el # Run the job named 'el' to extract and load data 8 | 9 | version: 1 10 | project_id: Jaffle Shop Template Project 11 | 12 | env: 13 | JAFFLE_DB_NAME: jaffle_shop 14 | JAFFLE_RAW_SCHEMA: jaffle_raw 15 | 16 | default_environment: dev 17 | environments: 18 | - name: dev 19 | 20 | plugins: 21 | extractors: 22 | - name: tap-jaffle-shop 23 | namespace: tap_jaffle_shop 24 | variant: meltanolabs 25 | pip_url: git+https://github.com/MeltanoLabs/tap-jaffle-shop.git@v0.3.0 26 | capabilities: 27 | - catalog 28 | - discover 29 | config: 30 | years: 2 31 | stream_name_prefix: ${JAFFLE_RAW_SCHEMA}-raw_ 32 | loaders: 33 | - name: target-postgres 34 | variant: datamill-co 35 | pip_url: git+https://github.com/datamill-co/target-postgres.git@v0.1.0 36 | config: 37 | host: 127.0.0.1 38 | user: postgres 39 | password: ${SNOWFLAKE_PASSWORD} 40 | default_target_schema: $JAFFLE_RAW_SCHEMA 41 | 42 | jobs: 43 | # Sample usage: `meltano run el` 44 | # Equivalent to: `meltano run tap-jaffle-shop target-duckdb` 45 | - name: el # Extract and load the raw data 46 | tasks: 47 | - tap-jaffle-shop target-postgres 48 | -------------------------------------------------------------------------------- /test_projects/customer_success/models/marts/__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: customers 5 | description: Customer overview data mart, offering key details for each unique customer. One row per customer. 6 | columns: 7 | - name: customer_id 8 | description: The unique key of the orders mart. 9 | tests: 10 | - not_null 11 | - unique 12 | - name: customer_name 13 | description: Customers' full name. 14 | - name: count_lifetime_orders 15 | description: Total number of orders a customer has ever placed. 16 | - name: first_ordered_at 17 | description: The timestamp when a customer placed their first order. 18 | - name: last_ordered_at 19 | description: The timestamp of a customer's most recent order. 20 | - name: lifetime_spend_pretax 21 | description: The sum of all the pre-tax subtotals of every order a customer has placed. 22 | - name: lifetime_spend 23 | description: The sum of all the order totals (including tax) that a customer has ever placed. 24 | - name: customer_type 25 | description: Options are 'new' or 'returning', indicating if a customer has ordered more than once or has only placed their first order to date. 26 | tests: 27 | - accepted_values: 28 | values: ["new", "returning"] 29 | -------------------------------------------------------------------------------- /test_projects/customer_success/models/marts/customer_status_histories.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def model(dbt, session): 5 | # set length of time considered a churn 6 | pd.Timedelta(days=2) 7 | 8 | dbt.config(enabled=False, materialized="table", packages=["pandas==1.5.2"]) 9 | 10 | orders_relation = dbt.ref("orders") 11 | 12 | # converting a DuckDB Python Relation into a pandas DataFrame 13 | orders_df = orders_relation.df() 14 | 15 | orders_df.sort_values(by="ordered_at", inplace=True) 16 | orders_df["previous_order_at"] = orders_df.groupby("customer_id")[ 17 | "ordered_at" 18 | ].shift(1) 19 | orders_df["next_order_at"] = orders_df.groupby("customer_id")["ordered_at"].shift( 20 | -1 21 | ) 22 | return orders_df 23 | -------------------------------------------------------------------------------- /test_projects/customer_success/models/marts/customers.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table' 4 | ) 5 | }} 6 | 7 | with 8 | 9 | customers as ( 10 | 11 | select * from {{ ref('stg_customers') }} 12 | 13 | ), 14 | 15 | orders_mart as ( 16 | 17 | select * from {{ ref('revenue', 'orders') }} 18 | 19 | ), 20 | 21 | integers as ( 22 | select * from {{ ref('revenue', 'integers') }} 23 | ), 24 | 25 | order_summary as ( 26 | 27 | select 28 | customer_id, 29 | 30 | count(*) as count_lifetime_orders, 31 | count(*) > 1 as is_repeat_buyer, 32 | min(ordered_at) as first_ordered_at, 33 | max(ordered_at) as last_ordered_at, 34 | 35 | sum(subtotal) as lifetime_spend_pretax, 36 | sum(order_total) as lifetime_spend 37 | 38 | from orders_mart 39 | group by 1 40 | 41 | ), 42 | 43 | joined as ( 44 | 45 | select 46 | customers.*, 47 | order_summary.count_lifetime_orders, 48 | order_summary.first_ordered_at, 49 | order_summary.last_ordered_at, 50 | order_summary.lifetime_spend_pretax, 51 | order_summary.lifetime_spend, 52 | 53 | case 54 | when order_summary.is_repeat_buyer then 'returning' 55 | else 'new' 56 | end as customer_type 57 | 58 | from customers 59 | 60 | left join order_summary 61 | on customers.customer_id = order_summary.customer_id 62 | 63 | ) 64 | 65 | select * from joined 66 | -------------------------------------------------------------------------------- /test_projects/customer_success/models/staging/__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_customers 5 | description: Customer data with basic cleaning and transformation applied, one row per customer. 6 | columns: 7 | - name: customer_id 8 | description: The unique key for each customer. 9 | tests: 10 | - not_null 11 | - unique 12 | -------------------------------------------------------------------------------- /test_projects/customer_success/models/staging/__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: ecom 5 | schema: raw 6 | description: E-commerce data 7 | tables: 8 | - name: raw_customers 9 | meta: 10 | external_location: "read_csv('jaffle-data/raw_customers.csv', names=['id', 'name'],AUTO_DETECT=TRUE)" 11 | description: One record per person who has purchased one or more items 12 | -------------------------------------------------------------------------------- /test_projects/customer_success/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | source as ( 4 | 5 | select * from {{ source('ecom', 'raw_customers') }} 6 | 7 | ), 8 | 9 | renamed as ( 10 | 11 | select 12 | 13 | ---------- ids 14 | id as customer_id, 15 | 16 | ---------- properties 17 | name as customer_name 18 | 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /test_projects/customer_success/package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: 1.0.0 4 | sha1_hash: efa9169fb1f1a1b2c967378c02b60e3d85ae464b 5 | -------------------------------------------------------------------------------- /test_projects/customer_success/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: 1.0.0 4 | -------------------------------------------------------------------------------- /test_projects/customer_success/profiles.yml: -------------------------------------------------------------------------------- 1 | customer_success: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | path: ../database.db 6 | threads: 4 7 | target: dev 8 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/.evidence/customization/custom-formatting.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "customFormats": [] 4 | } -------------------------------------------------------------------------------- /test_projects/customer_success/reports/.gitignore: -------------------------------------------------------------------------------- 1 | .evidence/template 2 | .svelte-kit 3 | build 4 | node_modules 5 | .DS_Store 6 | *.*duckdb 7 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/README.md: -------------------------------------------------------------------------------- 1 | # Jaffle Shop BI Reports 2 | 3 | This project uses [Evidence.dev](https://Evidence.dev) for BI reporting. 4 | 5 | ## Getting Started 6 | 7 | Run the BI server from your local workstation: 8 | 9 | ```shell 10 | cd reports 11 | npm run dev 12 | ``` 13 | 14 | This will launch the Evidence webserver in developer mode. 15 | 16 | ## Testing for breakages 17 | 18 | The following command can be used to confirm that reports and queries are still valid: 19 | 20 | ```console 21 | npm run build:strict 22 | ``` 23 | 24 | ## Updating to the latest version of Evidence 25 | 26 | 1. Check your version against the version number for the [latest release]([Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)). 27 | 2. Run `npm install evidence-dev/evidence@latest` to bump the version in `package.json` and automatically update dependenceis in `package-lock.json`. 28 | 29 | ## Learning More 30 | 31 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence) 32 | - [Project Home Page](https://www.evidence.dev) 33 | - [Github](https://github.com/evidence-dev/evidence) 34 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases) 35 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jaffle-shop", 3 | "version": "0.0.1", 4 | "scripts": { 5 | "build": "evidence build", 6 | "build:strict": "evidence build:strict", 7 | "dev": "evidence dev --host 0.0.0.0 --open /", 8 | "test": "evidence build", 9 | "help": "evidence --help" 10 | }, 11 | "engines": { 12 | "npm": ">=7.0.0", 13 | "node": ">=16.14.0" 14 | }, 15 | "type": "module", 16 | "dependencies": { 17 | "@evidence-dev/evidence": "15.0.1", 18 | "@evidence-dev/preprocess": "2.2.0", 19 | "@evidence-dev/components": "2.2.1" 20 | }, 21 | "overrides": { 22 | "jsonwebtoken": "9.0.0", 23 | "trim@<0.0.3": ">0.0.3", 24 | "sqlite3": "5.1.5" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/pages/analysis/seasonality-investigation.md: -------------------------------------------------------------------------------- 1 | # Seasonality Investigation 2 | *Written by Melissa Cranston in September 2017* 3 | 4 | *Analysis covers the time period of September 2016 to August 2017. All queries have been limited to that range.* 5 | 6 | [Jump to conclusions & recommendations ↓](#Conclusions5) 7 | 8 | ## Variations in Order Volume 9 | Plotting orders per day for the last 12 months reveals 3 things: 10 | - An unnaturally large jump in orders per day in March 2017 - this was driven by the new store opening in [Brooklyn](/stores/Brooklyn) 11 | - A repeating pattern of spikes which might be driven by different order volumes on specific days of the week 12 | - A drop in total orders per day around June 2017 13 | 14 | ```orders_per_day 15 | select 16 | date_trunc('day', ordered_at) as date, 17 | count(*) as orders 18 | 19 | from analytics.orders 20 | where ordered_at between '2016-09-01' and '2017-08-31' 21 | 22 | group by 1 23 | order by 1 24 | ``` 25 | 26 | 33 | 34 | ## Day of Week 35 | We can calculate average orders by day of week to check if there are differences in order volume across days. 36 | 37 | ```orders_by_weekday 38 | select 39 | date_part('dayofweek', date) as day_of_week_num, 40 | dayname(date) as day_of_week, 41 | avg(orders) as avg_orders 42 | from ${orders_per_day} 43 | group by 1, 2 44 | order by day_of_week_num 45 | ``` 46 | 47 | 55 | 56 | This reveals that weekdays generate significantly higher order volume than weekends. It also shows that orders are fairly consistent across individual days on weekdays (202-209 orders/day) and weekends (~50 orders/day). 57 | 58 | ## Hour of Day 59 | Now we'll break down orders by hour of day to see if there are patterns within days. Given the differences we just found between weekday and weekend volumes, we should split the results by those day types. We can use a loop for this. 60 | 61 | ```orders_hour_of_day 62 | with 63 | orders_by_hour as ( 64 | select 65 | date_part('hour', ordered_at) as hour_of_day, 66 | if(dayname(ordered_at) in ('Sunday', 'Saturday'), 'Weekend', 'Weekday') as day_type, 67 | count(*)::float as orders, 68 | count(distinct date_trunc('day', ordered_at)) as days 69 | from analytics.orders 70 | where ordered_at between '2016-09-01' and '2017-08-31' 71 | group by 1, 2 72 | order by hour_of_day 73 | ) 74 | 75 | select 76 | *, 77 | orders / days as orders_per_hour 78 | from orders_by_hour 79 | ``` 80 | 81 | {#each ['Weekday', 'Weekend'] as day_type} 82 | 83 | d.day_type === day_type)} 85 | x=hour_of_day 86 | y=orders_per_hour 87 | yAxisTitle=true 88 | xAxisTitle=true 89 | yMax=60 90 | title="{day_type} - Orders by Hour of Day" 91 | /> 92 | 93 | {/each} 94 | 95 | We see a significant peak in order volume between 7 and 9am on weekdays. There is also a slight increase in volume around lunch times (12-2pm) across all days of the week. 96 | 97 | ## Dayparts 98 | Based on the volumes shown above, we can break down our dayparts as: 99 | - Breakfast: 7-9am 100 | - Late Morning: 9am-12pm 101 | - Lunch: 12-2pm 102 | - Late Afternoon: 2-5pm 103 | 104 | In future analyses, these timeframes should be lined up with any existing operational timeframes (e.g., breakfast, lunch service windows). 105 | 106 | ```dayparts 107 | with 108 | orders_add_daypart as ( 109 | select 110 | *, 111 | case 112 | when hour_of_day between 7 and 8 then 'Breakfast' 113 | when hour_of_day between 9 and 11 then 'Late Morning' 114 | when hour_of_day between 12 and 14 then 'Lunch' 115 | when hour_of_day between 15 and 24 then 'Late Afternoon' 116 | end as daypart 117 | from ${orders_hour_of_day} 118 | ), 119 | 120 | orders_by_daypart as ( 121 | select 122 | daypart, 123 | day_type, 124 | sum(orders) / sum(days) as orders_per_hour, 125 | sum(orders) as orders 126 | from orders_add_daypart 127 | group by daypart, day_type 128 | ) 129 | 130 | select 131 | *, 132 | orders / sum(orders) over () as orders_pct1 133 | from orders_by_daypart 134 | ``` 135 | 136 | 145 | 146 | Almost half of all orders are generated from breakfast on weekdays. This might be driven by orders from customers who are on their way to work - a follow-up analysis on customer purchasing behaviour should be completed to investigate this. 147 | 148 | ## Conclusions 149 | - Weekdays generate significantly more orders than weekend days (~4x more orders on an average weekday compared to an average weekend day) 150 | - Early mornings (7-9am) on weekdays generate almost half of all orders for the company 151 | - There was a drop in orders in June 2017 - this has not been covered in this analysis, but should be investigated 152 | 153 | ### Recommended Follow-on Analyses 154 | - Investigate drop in orders in June 2017 155 | - Study customer purchasing behaviour, especially during weekday early mornings 156 | - Extend this analysis with a longer timeframe to investigate seasonality throughout the calendar year 157 | 158 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/pages/customers/[customer].md: -------------------------------------------------------------------------------- 1 | # {$page.params.customer}'s Customer Profile 2 | 3 | ```customers 4 | select 5 | *, 6 | first_ordered_at as first_order_longdate, 7 | last_ordered_at as last_order_longdate, 8 | lifetime_spend as lifetime_spend_usd, 9 | lifetime_spend / count_lifetime_orders as average_order_value_usd 10 | from analytics.customers 11 | ``` 12 | 13 | {$page.params.customer} has been a customer since d.customer_name === $page.params.customer)} column=first_order_longdate/>, with their most recent order occurring on d.customer_name === $page.params.customer)} column=last_order_longdate/>. 14 | 15 | ### Key stats: 16 | - d.customer_name === $page.params.customer)} column=count_lifetime_orders/> lifetime orders 17 | - d.customer_name === $page.params.customer)} column=lifetime_spend_usd/> in lifetime spend 18 | - d.customer_name === $page.params.customer)} column=average_order_value_usd/> average order value 19 | 20 | ```monthly_purchases 21 | select 22 | date_trunc('month', a.ordered_at) as month, 23 | b.customer_name, 24 | sum(a.order_total) as purchases_usd 25 | from analytics.orders a 26 | left join analytics.customers b 27 | on a.customer_id = b.customer_id 28 | group by month, customer_name 29 | order by month asc 30 | ``` 31 | 32 | d.customer_name === $page.params.customer)} 34 | x=month 35 | y=purchases_usd 36 | title="Purchases per Month by {$page.params.customer}" 37 | /> -------------------------------------------------------------------------------- /test_projects/customer_success/reports/pages/customers/index.md: -------------------------------------------------------------------------------- 1 | # Customers 2 | 3 | ```customers 4 | select 5 | customer_name, 6 | concat('/customers/', customer_name) as customer_link, 7 | count_lifetime_orders as lifetime_orders, 8 | lifetime_spend as lifetime_spend_usd, 9 | lifetime_spend / count_lifetime_orders as average_order_value_usd 10 | from analytics.customers 11 | order by lifetime_spend_usd desc 12 | ``` 13 | 14 | Click a row to see the report for that customer: 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/pages/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to Jaffle Shop 🥪 2 | 3 | ```monthly_stats 4 | with 5 | monthly_stats as ( 6 | select 7 | date_trunc('month', ordered_at) as month, 8 | sum(order_total) as revenue_usd1k, 9 | count(*)::float as orders, 10 | count(distinct customer_id)::float as customers 11 | 12 | from analytics.orders 13 | group by month 14 | order by month desc 15 | ) 16 | 17 | select 18 | *, 19 | revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1, 20 | orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1, 21 | customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1, 22 | monthname(month) as month_name 23 | from monthly_stats 24 | ``` 25 | 26 | 33 | 34 | 41 | 42 | Jaffle Shop locations served happy customers in . This was a change of from . 43 | 44 | ## Store Openings 45 | 46 | ```store_opening 47 | with 48 | most_recent_open as ( 49 | select 50 | location_name as opened_store, 51 | min(ordered_at) as opened_date_mmmyyyy, 52 | sum(order_total) as opened_revenue_usd 53 | from analytics.orders 54 | group by location_name 55 | order by opened_date_mmmyyyy desc 56 | limit 1 57 | ), 58 | 59 | company_total as ( 60 | select 61 | sum(order_total) as company_revenue_usd, 62 | from analytics.orders 63 | cross join most_recent_open 64 | where ordered_at >= opened_date_mmmyyyy 65 | ) 66 | 67 | select 68 | *, 69 | opened_revenue_usd / company_revenue_usd as revenue_pct 70 | from most_recent_open 71 | cross join company_total 72 | ``` 73 | 74 | The most recent Jaffle Shop store opening was in . Since opening, has contributed of total company sales. 75 | 76 | ```orders_per_week 77 | select 78 | date_trunc('week', ordered_at) as week, 79 | location_name, 80 | count(*) as orders, 81 | sum(order_total) as revenue_usd 82 | 83 | from analytics.orders 84 | 85 | group by 1,2 86 | order by 1 87 | ``` 88 | 89 | 98 | 99 | ```revenue_per_city 100 | select 101 | location_name as city, 102 | concat('/stores/', location_name) as store_link, 103 | count(distinct customer_id) as customers, 104 | count(*) as orders, 105 | sum(order_total) as revenue_usd 106 | 107 | from analytics.orders 108 | 109 | group by 1, 2 110 | ``` 111 | 112 | ## Reports on Individual Stores 113 | Click a row to see the report for that store: 114 | 115 | 116 | ## Seasonality 117 | See [Seasonality Investigation](/analysis/seasonality-investigation) for more information. 118 | 119 | ## Customers 120 | To see individual customer purchase history, see [Customers](/customers) 121 | 122 | ### Customer Cohorts 123 | Average order values are tracked using monthly cohorts, which are created by truncating `first_order_date` to month. 124 | 125 | ```customers_with_cohort 126 | select 127 | *, 128 | date_trunc('month', first_ordered_at) as cohort_month, 129 | lifetime_spend_pretax / count_lifetime_orders as average_order_value_usd0 130 | 131 | from analytics.customers 132 | ``` 133 | 134 | ```cohorts_aov 135 | select 136 | cohort_month, 137 | avg(average_order_value_usd0) as cohort_aov_usd 138 | 139 | from ${customers_with_cohort} 140 | 141 | group by 1 142 | order by cohort_month 143 | ``` 144 | 145 | 153 | 154 | ### Average Order Values 155 | 156 | 163 | -------------------------------------------------------------------------------- /test_projects/customer_success/reports/pages/stores/[city].md: -------------------------------------------------------------------------------- 1 | # Jaffle Shop {$page.params.city} 🥪 2 | 3 | ```opening 4 | select 5 | location_name, 6 | min(month) as opened_month_mmmyyyy 7 | from ${monthly_stats} 8 | group by location_name 9 | order by opened_month_mmmyyyy desc 10 | ``` 11 | 12 | {#if opening[0].location_name === $page.params.city} 13 | 14 | {$page.params.city} is the most recent store opening for Jaffle Shop, opened in d.location_name === $page.params.city)} column=opened_month_mmmyyyy />. 15 | 16 | {:else} 17 | 18 | The {$page.params.city} location was opened in d.location_name === $page.params.city)} column=opened_month_mmmyyyy />. 19 | 20 | {/if} 21 | 22 | ```monthly_stats 23 | with 24 | monthly_stats as ( 25 | select 26 | date_trunc('month', ordered_at) as month, 27 | location_name, 28 | sum(order_total) as revenue_usd1k, 29 | count(*)::float as orders, 30 | count(distinct customer_id)::float as customers 31 | 32 | from analytics.orders 33 | group by month, location_name 34 | order by month desc 35 | ) 36 | 37 | select 38 | *, 39 | revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1, 40 | orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1, 41 | customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1, 42 | monthname(month) as month_name 43 | from monthly_stats 44 | ``` 45 | 46 | data.location_name === $page.params.city)} 48 | value=revenue_usd1k 49 | comparison=revenue_growth_pct1 50 | title="Monthly Revenue" 51 | comparisonTitle="vs. prev. month" 52 | /> 53 | 54 | data.location_name === $page.params.city)} 56 | value=orders 57 | comparison=order_growth_pct1 58 | title="Monthly Orders" 59 | comparisonTitle="vs. prev. month" 60 | /> 61 | 62 | Jaffle Shop {$page.params.city} served d.location_name === $page.params.city)} column=customers/> happy customers in d.location_name === $page.params.city)} column=month_name/>. This was a change of d.location_name === $page.params.city)} column=customer_growth_pct1/> from d.location_name === $page.params.city)} column=month_name row=1/>. 63 | 64 | ```orders_per_week 65 | select 66 | location_name as city, 67 | date_trunc('week', ordered_at) as week, 68 | count(*) as orders 69 | 70 | from analytics.orders 71 | 72 | group by 1, 2 73 | order by 1, 2 74 | ``` 75 | 76 | ## Orders Per Week in {$page.params.city} 77 | 78 | data.city === $page.params.city)} 80 | x=week 81 | y=orders 82 | yAxisTitle="orders per week in {$page.params.city}" 83 | /> -------------------------------------------------------------------------------- /test_projects/customer_success/reports/pages/stores/index.md: -------------------------------------------------------------------------------- 1 | # Stores 2 | 3 | ```revenue_per_city 4 | select 5 | location_name as city, 6 | concat('/stores/', location_name) as store_link, 7 | count(distinct customer_id) as customers, 8 | count(*) as orders, 9 | sum(order_total) as revenue_usd 10 | 11 | from analytics.orders 12 | 13 | group by 1, 2 14 | ``` 15 | 16 | Click a row to see the report for that store: 17 | -------------------------------------------------------------------------------- /test_projects/customer_success/requirements.txt: -------------------------------------------------------------------------------- 1 | dbt-postgres==1.6.0-b8 2 | jafgen~=0.3.1 3 | pre-commit~=3.0.4 4 | sqlfluff-templater-dbt~=2.0.0a5 5 | sqlfluff~=2.0.0a5 -------------------------------------------------------------------------------- /test_projects/customer_success/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/snapshots/.gitkeep -------------------------------------------------------------------------------- /test_projects/customer_success/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/tests/.gitkeep -------------------------------------------------------------------------------- /test_projects/revenue/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | *.duckdb 6 | *.duckdb.wal 7 | reports/sources/*.csv 8 | .meltano 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /test_projects/revenue/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - id: requirements-txt-fixer 9 | - repo: https://github.com/charliermarsh/ruff-pre-commit 10 | rev: v0.0.245 11 | hooks: 12 | - id: ruff 13 | args: [--fix, --exit-non-zero-on-fix] 14 | - repo: https://github.com/pre-commit/mirrors-eslint 15 | rev: v8.34.0 16 | hooks: 17 | - id: eslint 18 | - repo: https://github.com/sqlfluff/sqlfluff 19 | rev: "2.0.0a4" 20 | hooks: 21 | - id: sqlfluff-lint 22 | additional_dependencies: 23 | ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"] 24 | - id: sqlfluff-fix 25 | additional_dependencies: 26 | ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"] 27 | - repo: https://github.com/psf/black 28 | rev: "23.1.0" 29 | hooks: 30 | - id: black 31 | # - repo: https://github.com/pre-commit/mirrors-prettier 32 | # rev: "" # Use the sha or tag you want to point at 33 | # hooks: 34 | # - id: prettier 35 | -------------------------------------------------------------------------------- /test_projects/revenue/.sqlfluff: -------------------------------------------------------------------------------- 1 | [sqlfluff] 2 | dialect = duckdb 3 | templater = dbt 4 | runaway_limit = 10 5 | max_line_length = 80 6 | indent_unit = space 7 | 8 | [sqlfluff:indentation] 9 | tab_space_size = 4 10 | 11 | [sqlfluff:layout:type:comma] 12 | spacing_before = touch 13 | line_position = trailing 14 | 15 | [sqlfluff:rules:capitalisation.keywords] 16 | capitalisation_policy = lower 17 | 18 | [sqlfluff:rules:aliasing.table] 19 | aliasing = explicit 20 | 21 | [sqlfluff:rules:aliasing.column] 22 | aliasing = explicit 23 | 24 | [sqlfluff:rules:aliasing.expression] 25 | allow_scalar = False 26 | 27 | [sqlfluff:rules:capitalisation.identifiers] 28 | extended_capitalisation_policy = lower 29 | 30 | [sqlfluff:rules:capitalisation.functions] 31 | capitalisation_policy = lower 32 | 33 | [sqlfluff:rules:capitalisation.literals] 34 | capitalisation_policy = lower 35 | 36 | [sqlfluff:rules:ambiguous.column_references] # Number in group by 37 | group_by_and_order_by_style = implicit 38 | -------------------------------------------------------------------------------- /test_projects/revenue/.sqlfluffignore: -------------------------------------------------------------------------------- 1 | reports 2 | target 3 | dbt_packages 4 | macros 5 | -------------------------------------------------------------------------------- /test_projects/revenue/README.md: -------------------------------------------------------------------------------- 1 | # 🥪 The Jaffle Shop 🦘 2 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/dbt-labs/jaffle-shop-template?quickstart=1) 3 | [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/dbt-labs/jaffle-shop-template) 4 | 5 | This is a template for creating a fully functional dbt project for teaching, learning, writing, demoing, or any other scenarios where you need a basic project with a synthesized jaffle shop business. We recommend beginners use the following steps to open this project right here on GitHub in a Codespace. If you're a little more experienced with devcontainers and want to go faster 🏎️, you can use the Gitpod link above for a quicker startup and deeper feature set. 6 | 7 | ## How to use 8 | 9 | ### 1. Click the big green 'Use this template' button and 'Create a new repository'. 10 | 11 | ![Click use template](.github/static/use-template.gif) 12 | 13 | This will create a new repository exactly like this one, and navigate you there. Make sure to execute the next instructions in that repo. 14 | 15 | ### 2. Click 'Code', then 'Codespaces, then 'Create codespace on main'. 16 | 17 | ![Create codespace on main](.github/static/open-codespace.gif) 18 | 19 | This will create a new `codespace`, a sandboxed devcontainer with everything you need for a dbt project. Once the codespace is finished setting up, you'll be ready to run a `dbt build`. 20 | 21 | ### 3. Make sure to wait til the codespace is finished setting up. 22 | 23 | ![Codespaces setup screen at postCreateCommand](.github/static/codespaces-setup-screen.png) 24 | 25 | After the container is built and connected to, VSCode will run a few clean up commands and then a `postCreateCommand`, a set of commands run after the container is set up. This is where we install our dependencies, such as dbt, the duckdb adapter, and other necessities, as well as run `dbt deps` to install the dbt packages we want to use. That screen will look something like the above. When it's completed it will close and leave you in a fresh terminal prompt. From there you're ready to do some analytics engineering! 26 | 27 | ## Additional included tools 28 | 29 | This template includes two additional tools for the other parts of the stack to create a more realistic experience: 30 | 31 | - BI reporting built with [Evidence](https://evidence.dev) - an open source, code-based BI tool to write reports with markdown and SQL. 32 | - EL with [Meltano](https://meltano.com/) - an open source tool that provides a CLI & version control for ELT pipelines. 33 | 34 | ### Evidence 35 | 36 | With Evidence you can: 37 | 38 | - Version control your BI layer 39 | - Build reports in the same repo as your dbt project 40 | - Deploy your reports to a static site 41 | 42 | #### Running Evidence 43 | 44 | To run Evidence, use: 45 | 46 | ```shell 47 | cd reports 48 | npm run dev 49 | ``` 50 | 51 | See the [Evidence CLI docs](https://docs.evidence.dev/cli) for more details. 52 | 53 | You can make changes to the markdown pages in the `reports/pages` folder and see the reports update in the browser preview. 54 | 55 | #### Learning More about Evidence 56 | 57 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence) 58 | - [Project Home Page](https://www.evidence.dev) 59 | - [Github](https://github.com/evidence-dev/evidence) 60 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases) 61 | 62 | ### Meltano 63 | 64 | This project is preconfigured with Meltano, which can be used to extract and load raw data into DuckDB. 65 | 66 | #### Run EL (Extract and Load) using Meltano 67 | 68 | ```console 69 | meltano run tap-jaffle-shop target-duckdb 70 | ``` 71 | 72 | Optionally, you can modify extract parameters using environment variables. For instance, this modified version will extract five years of data instead of the default 1 year. 73 | 74 | ```console 75 | TAP_JAFFLE_SHOP_YEARS=5 76 | meltano run tap-jaffle-shop target-duckdb 77 | ``` 78 | 79 | You can also modify any tap or target config with the interactive `config` command: 80 | 81 | ```console 82 | meltano config tap-jaffle-shop set --interactive 83 | meltano config target-duckdb set --interactive 84 | ``` 85 | 86 | ## Local development 87 | 88 | This project is optimized for running in a container. If you'd like to use it locally outside of container you'll need to follow the instructions below. 89 | 90 | 1. Create a python virtual environment and install the dependencies. 91 | 92 | ```console 93 | python3 -m venv .venv 94 | source .venv/bin/activate 95 | pip install -r requirements.txt 96 | ``` 97 | 98 | 2. Install meltano with [pipx](https://pypa.github.io/pipx/installation/). And install meltano's dependencies. 99 | 100 | ```console 101 | pipx install meltano 102 | meltano install 103 | ``` 104 | 105 | 3. Run the EL pipeline. 106 | 107 | ```console 108 | meltano run el 109 | ``` 110 | 111 | 4. Install dbt dependencies and build the dbt project. 112 | 113 | ```console 114 | dbt deps 115 | dbt build 116 | ``` 117 | 118 | 5. Install Evidence dependencies and run the Evidence server. 119 | 120 | ```console 121 | cd reports 122 | npm install 123 | npm run dev 124 | ``` 125 | 126 | ## Contributing 127 | 128 | We welcome issues and PRs requesting or adding new features. The package that generates the synthetic data, [`jafgen`](https://pypi.org/project/jafgen/), is also under active development, and will add more types of source data to model as we go along. If you have tests, descriptions, new models, metrics, materializations types, or techniques you use this repo to demonstrate, which you feel would make for a more expansive baseline experience, we encourage you to consider contributing them back in so that this project becomes an even better collective tool for exploring and learning dbt over time. 129 | -------------------------------------------------------------------------------- /test_projects/revenue/Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | tasks: 4 | deps: 5 | cmds: 6 | - python -m pip install --progress-bar off -r requirements.txt 7 | - dbt deps 8 | -------------------------------------------------------------------------------- /test_projects/revenue/analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/analyses/.gitkeep -------------------------------------------------------------------------------- /test_projects/revenue/dbt_loom.config.yml: -------------------------------------------------------------------------------- 1 | manifests: 2 | - name: potato 3 | type: file 4 | optional: true 5 | config: 6 | path: ../customer_success/target/manifest.json 7 | excluded_packages: 8 | - dbt_project_evaluator 9 | -------------------------------------------------------------------------------- /test_projects/revenue/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Name your project! Project names should contain only lowercase characters 2 | # and underscores. A good package name should reflect your organization's 3 | # name or the intended use of these models 4 | name: "revenue" 5 | version: "1.0.0" 6 | config-version: 2 7 | 8 | # This setting configures which "profile" dbt uses for this project. 9 | profile: "revenue" 10 | 11 | # These configurations specify where dbt should look for different types of files. 12 | # The `model-paths` config, for example, states that models in this project can be 13 | # found in the "models/" directory. You probably won't need to change these! 14 | model-paths: ["models"] 15 | analysis-paths: ["analyses"] 16 | test-paths: ["tests"] 17 | seed-paths: ["seeds"] 18 | macro-paths: ["macros"] 19 | snapshot-paths: ["snapshots"] 20 | 21 | target-path: "target" # directory which will store compiled SQL files 22 | clean-targets: # directories to be removed by `dbt clean` 23 | - "target" 24 | - "dbt_packages" 25 | 26 | vars: 27 | truncate_timespan_to: "{{ current_timestamp() }}" 28 | 29 | # Configuring models 30 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 31 | 32 | restrict-access: false 33 | 34 | # In this example config, we tell dbt to build all models in the example/ directory 35 | # as tables. These settings can be overridden in the individual model files 36 | # using the `{{ config(...) }}` macro. 37 | 38 | models: 39 | revenue: 40 | staging: 41 | +materialized: view 42 | marts: 43 | +materialized: table 44 | dbt_project_evaluator: 45 | +access: private 46 | marts: 47 | dag: 48 | fct_source_fanout: 49 | +enabled: true 50 | -------------------------------------------------------------------------------- /test_projects/revenue/jaffle-data/raw_products.csv: -------------------------------------------------------------------------------- 1 | sku,name,type,price,description 2 | JAF-001,nutellaphone who dis?,jaffle,1100,nutella and banana jaffle 3 | JAF-002,doctor stew,jaffle,1100,house-made beef stew jaffle 4 | JAF-003,the krautback,jaffle,1200,lamb and pork bratwurst with house-pickled cabbage sauerkraut and mustard 5 | JAF-004,flame impala,jaffle,1400,"pulled pork and pineapple al pastor marinated in ghost pepper sauce, kevin parker's favorite! " 6 | JAF-005,mel-bun,jaffle,1200,"melon and minced beef bao, in a jaffle, savory and sweet" 7 | BEV-001,tangaroo,beverage,600,mango and tangerine smoothie 8 | BEV-002,chai and mighty,beverage,500,oatmilk chai latte with protein boost 9 | BEV-003,vanilla ice,beverage,600,iced coffee with house-made french vanilla syrup 10 | BEV-004,for richer or pourover ,beverage,700,daily selection of single estate beans for a delicious hot pourover 11 | BEV-005,adele-ade,beverage,400,"a kiwi and lime agua fresca, hello from the other side of thirst" 12 | -------------------------------------------------------------------------------- /test_projects/revenue/jaffle-data/raw_stores.csv: -------------------------------------------------------------------------------- 1 | id,name,opened_at,tax_rate 2 | 7f790ed7-0fc4-4de2-a1b0-cce72e657fc4,Philadelphia,2016-09-01T00:00:00,0.06 3 | 08d44615-06d3-4086-a5d7-21395a1d975e,Brooklyn,2017-03-12T00:00:00,0.04 4 | f6f2bd97-becb-4e1c-a611-20c7cf579841,Chicago,2018-04-29T00:00:00,0.0625 5 | 48b0172c-4490-4f05-b290-e69f418d0575,San Francisco,2018-05-09T00:00:00,0.075 6 | ed2af26d-35a1-4a31-ac65-7aedcaa7b7a7,New Orleans,2019-03-10T00:00:00,0.04 7 | -------------------------------------------------------------------------------- /test_projects/revenue/jaffle-data/raw_supplies.csv: -------------------------------------------------------------------------------- 1 | id,name,cost,perishable,sku 2 | SUP-001,compostable cutlery - knife,7,False,JAF-001 3 | SUP-002,cutlery - fork,7,False,JAF-001 4 | SUP-003,serving boat,11,False,JAF-001 5 | SUP-004,napkin,4,False,JAF-001 6 | SUP-009,bread,33,True,JAF-001 7 | SUP-011,nutella,46,True,JAF-001 8 | SUP-012,banana,13,True,JAF-001 9 | SUP-001,compostable cutlery - knife,7,False,JAF-002 10 | SUP-002,cutlery - fork,7,False,JAF-002 11 | SUP-003,serving boat,11,False,JAF-002 12 | SUP-004,napkin,4,False,JAF-002 13 | SUP-009,bread,33,True,JAF-002 14 | SUP-010,cheese,20,True,JAF-002 15 | SUP-013,beef stew,169,True,JAF-002 16 | SUP-001,compostable cutlery - knife,7,False,JAF-003 17 | SUP-002,cutlery - fork,7,False,JAF-003 18 | SUP-003,serving boat,11,False,JAF-003 19 | SUP-004,napkin,4,False,JAF-003 20 | SUP-009,bread,33,True,JAF-003 21 | SUP-010,cheese,20,True,JAF-003 22 | SUP-014,lamb and pork bratwurst,234,True,JAF-003 23 | SUP-015,house-pickled cabbage sauerkraut,43,True,JAF-003 24 | SUP-016,mustard,7,True,JAF-003 25 | SUP-001,compostable cutlery - knife,7,False,JAF-004 26 | SUP-002,cutlery - fork,7,False,JAF-004 27 | SUP-003,serving boat,11,False,JAF-004 28 | SUP-004,napkin,4,False,JAF-004 29 | SUP-009,bread,33,True,JAF-004 30 | SUP-010,cheese,20,True,JAF-004 31 | SUP-017,pulled pork,215,True,JAF-004 32 | SUP-018,pineapple,26,True,JAF-004 33 | SUP-021,ghost pepper sauce,20,True,JAF-004 34 | SUP-001,compostable cutlery - knife,7,False,JAF-005 35 | SUP-002,cutlery - fork,7,False,JAF-005 36 | SUP-003,serving boat,11,False,JAF-005 37 | SUP-004,napkin,4,False,JAF-005 38 | SUP-009,bread,33,True,JAF-005 39 | SUP-010,cheese,20,True,JAF-005 40 | SUP-019,melon,33,True,JAF-005 41 | SUP-020,minced beef,124,True,JAF-005 42 | SUP-005,16oz compostable clear cup,13,False,BEV-001 43 | SUP-006,16oz compostable clear lid,4,False,BEV-001 44 | SUP-007,biodegradable straw,13,False,BEV-001 45 | SUP-022,mango,32,True,BEV-001 46 | SUP-023,tangerine,20,True,BEV-001 47 | SUP-005,16oz compostable clear cup,13,False,BEV-002 48 | SUP-006,16oz compostable clear lid,4,False,BEV-002 49 | SUP-007,biodegradable straw,13,False,BEV-002 50 | SUP-008,chai mix,98,True,BEV-002 51 | SUP-024,oatmilk,11,True,BEV-002 52 | SUP-025,whey protein,36,True,BEV-002 53 | SUP-005,16oz compostable clear cup,13,False,BEV-003 54 | SUP-006,16oz compostable clear lid,4,False,BEV-003 55 | SUP-007,biodegradable straw,13,False,BEV-003 56 | SUP-026,coffee,52,True,BEV-003 57 | SUP-027,french vanilla syrup,72,True,BEV-003 58 | SUP-005,16oz compostable clear cup,13,False,BEV-004 59 | SUP-006,16oz compostable clear lid,4,False,BEV-004 60 | SUP-007,biodegradable straw,13,False,BEV-004 61 | SUP-026,coffee,52,True,BEV-004 62 | SUP-005,16oz compostable clear cup,13,False,BEV-005 63 | SUP-006,16oz compostable clear lid,4,False,BEV-005 64 | SUP-007,biodegradable straw,13,False,BEV-005 65 | SUP-028,kiwi,20,True,BEV-005 66 | SUP-029,lime,13,True,BEV-005 67 | -------------------------------------------------------------------------------- /test_projects/revenue/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/macros/.gitkeep -------------------------------------------------------------------------------- /test_projects/revenue/macros/cents_to_dollars.sql: -------------------------------------------------------------------------------- 1 | {# A basic example for a project-wide macro to cast a column uniformly #} 2 | 3 | {% macro cents_to_dollars(column_name, precision=2) -%} 4 | ({{ column_name }} / 100)::numeric(16, {{ precision }}) 5 | {%- endmacro %} 6 | -------------------------------------------------------------------------------- /test_projects/revenue/meltano.yml: -------------------------------------------------------------------------------- 1 | # Meltano Configuration File 2 | # 3 | # Sample usage: 4 | # > meltano run tap-jaffle-shop target-duckdb 5 | # 6 | # Or equivalently: 7 | # > meltano run el # Run the job named 'el' to extract and load data 8 | 9 | version: 1 10 | project_id: Jaffle Shop Template Project 11 | 12 | env: 13 | JAFFLE_DB_NAME: jaffle_shop 14 | JAFFLE_RAW_SCHEMA: jaffle_raw 15 | 16 | default_environment: dev 17 | environments: 18 | - name: dev 19 | 20 | plugins: 21 | extractors: 22 | - name: tap-jaffle-shop 23 | namespace: tap_jaffle_shop 24 | variant: meltanolabs 25 | pip_url: git+https://github.com/MeltanoLabs/tap-jaffle-shop.git@v0.3.0 26 | capabilities: 27 | - catalog 28 | - discover 29 | config: 30 | years: 2 31 | stream_name_prefix: ${JAFFLE_RAW_SCHEMA}-raw_ 32 | loaders: 33 | - name: target-postgres 34 | variant: datamill-co 35 | pip_url: git+https://github.com/datamill-co/target-postgres.git@v0.1.0 36 | config: 37 | host: 127.0.0.1 38 | user: postgres 39 | password: ${SNOWFLAKE_PASSWORD} 40 | default_target_schema: $JAFFLE_RAW_SCHEMA 41 | 42 | jobs: 43 | # Sample usage: `meltano run el` 44 | # Equivalent to: `meltano run tap-jaffle-shop target-duckdb` 45 | - name: el # Extract and load the raw data 46 | tasks: 47 | - tap-jaffle-shop target-postgres 48 | -------------------------------------------------------------------------------- /test_projects/revenue/models/groups.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: sales 3 | owner: 4 | email: sales@example.com 5 | -------------------------------------------------------------------------------- /test_projects/revenue/models/marts/__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: orders 5 | description: > 6 | Order overview data mart, offering key details for each order including if it's a customer's 7 | first order and a food vs. drink item breakdown. One row per order. 8 | access: public 9 | tests: 10 | - dbt_utils.expression_is_true: 11 | expression: "count_food_items + count_drink_items = count_items" 12 | - dbt_utils.expression_is_true: 13 | expression: "subtotal_food_items + subtotal_drink_items = subtotal" 14 | 15 | columns: 16 | - name: order_id 17 | description: The unique key of the orders mart. 18 | tests: 19 | - not_null 20 | - unique 21 | - name: customer_id 22 | description: The foreign key relating to the customer who placed the order. 23 | - name: location_id 24 | description: The foreign key relating to the location the order was placed at. 25 | - name: order_total 26 | description: The total amount of the order in USD including tax. 27 | - name: ordered_at 28 | description: The timestamp the order was placed at. 29 | - name: count_food_items 30 | description: The number of individual food items ordered. 31 | - name: count_drink_items 32 | description: The number of individual drink items ordered. 33 | - name: count_items 34 | description: The total number of both food and drink items ordered. 35 | - name: subtotal_food_items 36 | description: The sum of all the food item prices without tax. 37 | - name: subtotal_drink_items 38 | description: The sum of all the drink item prices without tax. 39 | - name: subtotal 40 | description: The sum total of both food and drink item prices without tax. 41 | - name: order_cost 42 | description: The sum of supply expenses to fulfill the order. 43 | - name: location_name 44 | description: > 45 | The full location name of where this order was placed. Denormalized from `stg_locations`. 46 | - name: is_first_order 47 | description: > 48 | A boolean indicating if this order is from a new customer placing their first order. 49 | - name: is_food_order 50 | description: A boolean indicating if this order included any food items. 51 | - name: is_drink_order 52 | description: A boolean indicating if this order included any drink items. 53 | 54 | latest_version: 2 55 | versions: 56 | - v: 1 57 | deprecation_date: "2024-01-01" 58 | 59 | - v: 2 60 | columns: 61 | - include: all 62 | exclude: [location_id] 63 | 64 | - name: accounts 65 | description: > 66 | All accounts with whom we have done business. This is a very sensitive asset. 67 | access: private 68 | group: sales 69 | 70 | columns: 71 | - name: name 72 | description: Name of the account. 73 | tests: 74 | - not_null 75 | - unique 76 | -------------------------------------------------------------------------------- /test_projects/revenue/models/marts/accounts.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | final as ( 4 | select name from {{ ref('stg_accounts') }} 5 | ) 6 | 7 | 8 | select * from final 9 | -------------------------------------------------------------------------------- /test_projects/revenue/models/marts/orders_v1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'incremental', 4 | unique_key = 'order_id' 5 | ) 6 | }} 7 | 8 | with 9 | 10 | {# 11 | DuckDB will see {{ this }} evaluate to `orders` and a CTE called `orders` as being the same 12 | so when using DuckDB we append `_set` to any CTEs with the same name as {{ this }} to indicate 13 | we're not executing a recursive statement 14 | #} 15 | 16 | orders_set as ( 17 | 18 | select * from {{ ref('stg_orders') }} 19 | 20 | where 21 | true 22 | 23 | {% if is_incremental() %} 24 | 25 | and ordered_at >= ( 26 | select max(ordered_at) as most_recent_record from {{ this }} 27 | ) 28 | 29 | {% endif %} 30 | 31 | ), 32 | 33 | order_items as ( 34 | 35 | select * from {{ ref('stg_order_items') }} 36 | 37 | ), 38 | 39 | products as ( 40 | 41 | select * from {{ ref('stg_products') }} 42 | 43 | ), 44 | 45 | locations as ( 46 | 47 | select * from {{ ref('stg_locations') }} 48 | 49 | ), 50 | 51 | supplies as ( 52 | 53 | select * from {{ ref('stg_supplies') }} 54 | 55 | ), 56 | 57 | order_items_summary as ( 58 | 59 | select 60 | 61 | order_items.order_id, 62 | 63 | sum(products.is_food_item) as count_food_items, 64 | sum(products.is_drink_item) as count_drink_items, 65 | count(*) as count_items, 66 | sum( 67 | case 68 | when products.is_food_item = 1 then products.product_price 69 | else 0 70 | end 71 | ) as subtotal_drink_items, 72 | sum( 73 | case 74 | when products.is_drink_item = 1 then products.product_price 75 | else 0 76 | end 77 | ) as subtotal_food_items, 78 | sum(products.product_price) as subtotal 79 | 80 | from order_items 81 | 82 | left join products on order_items.product_id = products.product_id 83 | 84 | group by 1 85 | 86 | ), 87 | 88 | order_supplies_summary as ( 89 | 90 | select 91 | 92 | order_items.order_id, 93 | 94 | sum(supplies.supply_cost) as order_cost 95 | 96 | from order_items 97 | 98 | left join supplies on order_items.product_id = supplies.product_id 99 | 100 | group by 1 101 | 102 | ), 103 | 104 | joined as ( 105 | 106 | select 107 | 108 | orders_set.*, 109 | 110 | order_items_summary.count_food_items, 111 | order_items_summary.count_drink_items, 112 | order_items_summary.count_items, 113 | 114 | order_items_summary.subtotal_drink_items, 115 | order_items_summary.subtotal_food_items, 116 | order_items_summary.subtotal, 117 | 118 | order_supplies_summary.order_cost, 119 | locations.location_name 120 | 121 | from orders_set 122 | 123 | left join order_items_summary 124 | on orders_set.order_id = order_items_summary.order_id 125 | left join order_supplies_summary 126 | on orders_set.order_id = order_supplies_summary.order_id 127 | left join locations 128 | on orders_set.location_id = locations.location_id 129 | 130 | ), 131 | 132 | final as ( 133 | 134 | select 135 | 136 | *, 137 | count_food_items > 0 as is_food_order, 138 | count_drink_items > 0 as is_drink_order 139 | 140 | from joined 141 | 142 | ) 143 | 144 | select * from final 145 | -------------------------------------------------------------------------------- /test_projects/revenue/models/marts/orders_v2.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'incremental', 4 | unique_key = 'order_id' 5 | ) 6 | }} 7 | 8 | with 9 | 10 | {# 11 | DuckDB will see {{ this }} evaluate to `orders` and a CTE called `orders` as being the same 12 | so when using DuckDB we append `_set` to any CTEs with the same name as {{ this }} to indicate 13 | we're not executing a recursive statement 14 | #} 15 | 16 | orders_set as ( 17 | 18 | select * from {{ ref('stg_orders') }} 19 | 20 | where 21 | true 22 | 23 | {% if is_incremental() %} 24 | 25 | and ordered_at >= ( 26 | select max(ordered_at) as most_recent_record from {{ this }} 27 | ) 28 | 29 | {% endif %} 30 | 31 | ), 32 | 33 | order_items as ( 34 | 35 | select * from {{ ref('stg_order_items') }} 36 | 37 | ), 38 | 39 | products as ( 40 | 41 | select * from {{ ref('stg_products') }} 42 | 43 | ), 44 | 45 | locations as ( 46 | 47 | select * from {{ ref('stg_locations') }} 48 | 49 | ), 50 | 51 | supplies as ( 52 | 53 | select * from {{ ref('stg_supplies') }} 54 | 55 | ), 56 | 57 | accounts as ( 58 | select * from {{ ref('stg_accounts') }} 59 | ), 60 | 61 | order_items_summary as ( 62 | 63 | select 64 | 65 | order_items.order_id, 66 | 67 | sum(products.is_food_item) as count_food_items, 68 | sum(products.is_drink_item) as count_drink_items, 69 | count(*) as count_items, 70 | sum( 71 | case 72 | when products.is_food_item = 1 then products.product_price 73 | else 0 74 | end 75 | ) as subtotal_drink_items, 76 | sum( 77 | case 78 | when products.is_drink_item = 1 then products.product_price 79 | else 0 80 | end 81 | ) as subtotal_food_items, 82 | sum(products.product_price) as subtotal 83 | 84 | from order_items 85 | 86 | left join products on order_items.product_id = products.product_id 87 | 88 | group by 1 89 | 90 | ), 91 | 92 | order_supplies_summary as ( 93 | 94 | select 95 | 96 | order_items.order_id, 97 | 98 | sum(supplies.supply_cost) as order_cost 99 | 100 | from order_items 101 | 102 | left join supplies on order_items.product_id = supplies.product_id 103 | 104 | group by 1 105 | 106 | ), 107 | 108 | joined as ( 109 | 110 | select 111 | 112 | orders_set.* exclude location_id, 113 | 114 | order_items_summary.count_food_items, 115 | order_items_summary.count_drink_items, 116 | order_items_summary.count_items, 117 | 118 | order_items_summary.subtotal_drink_items, 119 | order_items_summary.subtotal_food_items, 120 | order_items_summary.subtotal, 121 | 122 | order_supplies_summary.order_cost, 123 | locations.location_name 124 | 125 | from orders_set 126 | 127 | left join order_items_summary 128 | on orders_set.order_id = order_items_summary.order_id 129 | left join order_supplies_summary 130 | on orders_set.order_id = order_supplies_summary.order_id 131 | left join locations 132 | on orders_set.location_id = locations.location_id 133 | 134 | ), 135 | 136 | final as ( 137 | 138 | select 139 | 140 | *, 141 | count_food_items > 0 as is_food_order, 142 | count_drink_items > 0 as is_drink_order 143 | 144 | from joined 145 | 146 | ) 147 | 148 | select * from final 149 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_locations 5 | description: List of open locations with basic cleaning and transformation applied, one row per location. 6 | columns: 7 | - name: location_id 8 | description: The unique key for each location. 9 | tests: 10 | - not_null 11 | - unique 12 | 13 | - name: stg_order_items 14 | description: Individual food and drink items that make up our orders, one row per item. 15 | columns: 16 | - name: order_item_id 17 | description: The unique key for each order item. 18 | tests: 19 | - not_null 20 | - unique 21 | 22 | - name: stg_orders 23 | description: Order data with basic cleaning and transformation applied, one row per order. 24 | columns: 25 | - name: order_id 26 | description: The unique key for each order. 27 | tests: 28 | - not_null 29 | - unique 30 | 31 | - name: stg_products 32 | description: Product (food and drink items that can be ordered) data with basic cleaning and transformation applied, one row per product. 33 | columns: 34 | - name: product_id 35 | description: The unique key for each product. 36 | tests: 37 | - not_null 38 | - unique 39 | 40 | - name: stg_supplies 41 | description: > 42 | List of our supply expenses data with basic cleaning and transformation applied. 43 | 44 | One row per supply cost, not per supply. As supply costs fluctuate they receive a new row with a new UUID. Thus there can be multiple rows per supply_id. 45 | columns: 46 | - name: supply_uuid 47 | description: The unique key of our supplies per cost. 48 | tests: 49 | - not_null 50 | - unique 51 | 52 | - name: stg_accounts 53 | description: > 54 | List of all accounts. 55 | columns: 56 | - name: name 57 | description: The unique key of our accounts. 58 | tests: 59 | - not_null 60 | - unique 61 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: ecom 5 | schema: raw 6 | description: E-commerce data 7 | tables: 8 | - name: raw_orders 9 | meta: 10 | external_location: "read_csv('jaffle-data/raw_orders.csv',AUTO_DETECT=TRUE)" 11 | description: One record per order (consisting of one or more order items) 12 | - name: raw_items 13 | meta: 14 | external_location: "read_csv('jaffle-data/raw_items.csv', names=['id', 'order_id', 'sku'], AUTO_DETECT=TRUE)" 15 | description: Items included in an order 16 | - name: raw_stores 17 | meta: 18 | external_location: "read_csv('jaffle-data/raw_stores.csv',AUTO_DETECT=TRUE)" 19 | description: One record per physical store location 20 | - name: raw_products 21 | meta: 22 | external_location: "read_csv('jaffle-data/raw_products.csv',AUTO_DETECT=TRUE)" 23 | description: One record per SKU for items sold in stores 24 | - name: raw_supplies 25 | meta: 26 | external_location: "read_csv('jaffle-data/raw_supplies.csv',AUTO_DETECT=TRUE)" 27 | description: One record per supply per SKU of items sold in stores 28 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/stg_accounts.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref('seed_accounts') }} -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/stg_locations.sql: -------------------------------------------------------------------------------- 1 | 2 | with 3 | 4 | source as ( 5 | 6 | select * from {{ source('ecom', 'raw_stores') }} 7 | 8 | {# data runs to 2026, truncate timespan to desired range, 9 | current time as default #} 10 | where opened_at::timestamptz <= {{ var('truncate_timespan_to') }} 11 | 12 | ), 13 | 14 | renamed as ( 15 | 16 | select 17 | 18 | ---------- ids 19 | id as location_id, 20 | 21 | ---------- properties 22 | name as location_name, 23 | tax_rate, 24 | 25 | ---------- timestamp 26 | opened_at 27 | 28 | from source 29 | 30 | ) 31 | 32 | select * from renamed 33 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/stg_order_items.sql: -------------------------------------------------------------------------------- 1 | 2 | with 3 | 4 | source as ( 5 | 6 | select * from {{ source('ecom', 'raw_items') }} 7 | 8 | ), 9 | 10 | renamed as ( 11 | 12 | select 13 | 14 | ---------- ids 15 | id as order_item_id, 16 | order_id, 17 | 18 | ---------- properties 19 | sku as product_id 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed 26 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/stg_orders.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table' 4 | ) 5 | }} 6 | 7 | with 8 | 9 | source as ( 10 | 11 | select * from {{ source('ecom', 'raw_orders') }} 12 | 13 | -- data runs to 2026, truncate timespan to desired range, 14 | -- current time as default 15 | where ordered_at::timestamptz <= {{ var('truncate_timespan_to') }} 16 | 17 | ), 18 | 19 | renamed as ( 20 | 21 | select 22 | 23 | ---------- ids 24 | id as order_id, 25 | store_id as location_id, 26 | customer as customer_id, 27 | 28 | ---------- properties 29 | (order_total / 100.0)::float as order_total, 30 | (tax_paid / 100.0)::float as tax_paid, 31 | 32 | ---------- timestamps 33 | ordered_at 34 | 35 | from source 36 | 37 | ) 38 | 39 | select * from renamed 40 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/stg_products.sql: -------------------------------------------------------------------------------- 1 | 2 | with 3 | 4 | source as ( 5 | 6 | select * from {{ source('ecom', 'raw_products') }} 7 | 8 | ), 9 | 10 | renamed as ( 11 | 12 | select 13 | 14 | ---------- ids 15 | sku as product_id, 16 | 17 | ---------- properties 18 | name as product_name, 19 | type as product_type, 20 | description as product_description, 21 | (price / 100.0)::float as product_price, 22 | 23 | 24 | ---------- derived 25 | case 26 | when type = 'jaffle' then 1 27 | else 0 28 | end as is_food_item, 29 | 30 | case 31 | when type = 'beverage' then 1 32 | else 0 33 | end as is_drink_item 34 | 35 | from source 36 | 37 | ) 38 | 39 | select * from renamed 40 | -------------------------------------------------------------------------------- /test_projects/revenue/models/staging/stg_supplies.sql: -------------------------------------------------------------------------------- 1 | 2 | with 3 | 4 | source as ( 5 | 6 | select * from {{ source('ecom', 'raw_supplies') }} 7 | 8 | ), 9 | 10 | renamed as ( 11 | 12 | select 13 | 14 | ---------- ids 15 | {{ dbt_utils.generate_surrogate_key(['id', 'sku']) }} as supply_uuid, 16 | id as supply_id, 17 | sku as product_id, 18 | 19 | ---------- properties 20 | name as supply_name, 21 | (cost / 100.0)::float as supply_cost, 22 | perishable as is_perishable_supply 23 | 24 | from source 25 | 26 | ) 27 | 28 | select * from renamed 29 | -------------------------------------------------------------------------------- /test_projects/revenue/package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: 1.0.0 4 | - package: dbt-labs/dbt_project_evaluator 5 | version: 0.14.3 6 | sha1_hash: 52459ce227fef835e4466cbb12d624b3e1971fae 7 | -------------------------------------------------------------------------------- /test_projects/revenue/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: 1.0.0 4 | - package: dbt-labs/dbt_project_evaluator 5 | version: 0.14.3 6 | -------------------------------------------------------------------------------- /test_projects/revenue/profiles.yml: -------------------------------------------------------------------------------- 1 | revenue: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | path: ../database.db 6 | threads: 4 7 | target: dev 8 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/.evidence/customization/custom-formatting.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "customFormats": [] 4 | } -------------------------------------------------------------------------------- /test_projects/revenue/reports/.gitignore: -------------------------------------------------------------------------------- 1 | .evidence/template 2 | .svelte-kit 3 | build 4 | node_modules 5 | .DS_Store 6 | *.*duckdb 7 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/README.md: -------------------------------------------------------------------------------- 1 | # Jaffle Shop BI Reports 2 | 3 | This project uses [Evidence.dev](https://Evidence.dev) for BI reporting. 4 | 5 | ## Getting Started 6 | 7 | Run the BI server from your local workstation: 8 | 9 | ```shell 10 | cd reports 11 | npm run dev 12 | ``` 13 | 14 | This will launch the Evidence webserver in developer mode. 15 | 16 | ## Testing for breakages 17 | 18 | The following command can be used to confirm that reports and queries are still valid: 19 | 20 | ```console 21 | npm run build:strict 22 | ``` 23 | 24 | ## Updating to the latest version of Evidence 25 | 26 | 1. Check your version against the version number for the [latest release]([Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)). 27 | 2. Run `npm install evidence-dev/evidence@latest` to bump the version in `package.json` and automatically update dependenceis in `package-lock.json`. 28 | 29 | ## Learning More 30 | 31 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence) 32 | - [Project Home Page](https://www.evidence.dev) 33 | - [Github](https://github.com/evidence-dev/evidence) 34 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases) 35 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jaffle-shop", 3 | "version": "0.0.1", 4 | "scripts": { 5 | "build": "evidence build", 6 | "build:strict": "evidence build:strict", 7 | "dev": "evidence dev --host 0.0.0.0 --open /", 8 | "test": "evidence build", 9 | "help": "evidence --help" 10 | }, 11 | "engines": { 12 | "npm": ">=7.0.0", 13 | "node": ">=16.14.0" 14 | }, 15 | "type": "module", 16 | "dependencies": { 17 | "@evidence-dev/evidence": "15.0.1", 18 | "@evidence-dev/preprocess": "2.2.0", 19 | "@evidence-dev/components": "2.2.1" 20 | }, 21 | "overrides": { 22 | "jsonwebtoken": "9.0.0", 23 | "trim@<0.0.3": ">0.0.3", 24 | "sqlite3": "5.1.5" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/pages/analysis/seasonality-investigation.md: -------------------------------------------------------------------------------- 1 | # Seasonality Investigation 2 | *Written by Melissa Cranston in September 2017* 3 | 4 | *Analysis covers the time period of September 2016 to August 2017. All queries have been limited to that range.* 5 | 6 | [Jump to conclusions & recommendations ↓](#Conclusions5) 7 | 8 | ## Variations in Order Volume 9 | Plotting orders per day for the last 12 months reveals 3 things: 10 | - An unnaturally large jump in orders per day in March 2017 - this was driven by the new store opening in [Brooklyn](/stores/Brooklyn) 11 | - A repeating pattern of spikes which might be driven by different order volumes on specific days of the week 12 | - A drop in total orders per day around June 2017 13 | 14 | ```orders_per_day 15 | select 16 | date_trunc('day', ordered_at) as date, 17 | count(*) as orders 18 | 19 | from analytics.orders 20 | where ordered_at between '2016-09-01' and '2017-08-31' 21 | 22 | group by 1 23 | order by 1 24 | ``` 25 | 26 | 33 | 34 | ## Day of Week 35 | We can calculate average orders by day of week to check if there are differences in order volume across days. 36 | 37 | ```orders_by_weekday 38 | select 39 | date_part('dayofweek', date) as day_of_week_num, 40 | dayname(date) as day_of_week, 41 | avg(orders) as avg_orders 42 | from ${orders_per_day} 43 | group by 1, 2 44 | order by day_of_week_num 45 | ``` 46 | 47 | 55 | 56 | This reveals that weekdays generate significantly higher order volume than weekends. It also shows that orders are fairly consistent across individual days on weekdays (202-209 orders/day) and weekends (~50 orders/day). 57 | 58 | ## Hour of Day 59 | Now we'll break down orders by hour of day to see if there are patterns within days. Given the differences we just found between weekday and weekend volumes, we should split the results by those day types. We can use a loop for this. 60 | 61 | ```orders_hour_of_day 62 | with 63 | orders_by_hour as ( 64 | select 65 | date_part('hour', ordered_at) as hour_of_day, 66 | if(dayname(ordered_at) in ('Sunday', 'Saturday'), 'Weekend', 'Weekday') as day_type, 67 | count(*)::float as orders, 68 | count(distinct date_trunc('day', ordered_at)) as days 69 | from analytics.orders 70 | where ordered_at between '2016-09-01' and '2017-08-31' 71 | group by 1, 2 72 | order by hour_of_day 73 | ) 74 | 75 | select 76 | *, 77 | orders / days as orders_per_hour 78 | from orders_by_hour 79 | ``` 80 | 81 | {#each ['Weekday', 'Weekend'] as day_type} 82 | 83 | d.day_type === day_type)} 85 | x=hour_of_day 86 | y=orders_per_hour 87 | yAxisTitle=true 88 | xAxisTitle=true 89 | yMax=60 90 | title="{day_type} - Orders by Hour of Day" 91 | /> 92 | 93 | {/each} 94 | 95 | We see a significant peak in order volume between 7 and 9am on weekdays. There is also a slight increase in volume around lunch times (12-2pm) across all days of the week. 96 | 97 | ## Dayparts 98 | Based on the volumes shown above, we can break down our dayparts as: 99 | - Breakfast: 7-9am 100 | - Late Morning: 9am-12pm 101 | - Lunch: 12-2pm 102 | - Late Afternoon: 2-5pm 103 | 104 | In future analyses, these timeframes should be lined up with any existing operational timeframes (e.g., breakfast, lunch service windows). 105 | 106 | ```dayparts 107 | with 108 | orders_add_daypart as ( 109 | select 110 | *, 111 | case 112 | when hour_of_day between 7 and 8 then 'Breakfast' 113 | when hour_of_day between 9 and 11 then 'Late Morning' 114 | when hour_of_day between 12 and 14 then 'Lunch' 115 | when hour_of_day between 15 and 24 then 'Late Afternoon' 116 | end as daypart 117 | from ${orders_hour_of_day} 118 | ), 119 | 120 | orders_by_daypart as ( 121 | select 122 | daypart, 123 | day_type, 124 | sum(orders) / sum(days) as orders_per_hour, 125 | sum(orders) as orders 126 | from orders_add_daypart 127 | group by daypart, day_type 128 | ) 129 | 130 | select 131 | *, 132 | orders / sum(orders) over () as orders_pct1 133 | from orders_by_daypart 134 | ``` 135 | 136 | 145 | 146 | Almost half of all orders are generated from breakfast on weekdays. This might be driven by orders from customers who are on their way to work - a follow-up analysis on customer purchasing behaviour should be completed to investigate this. 147 | 148 | ## Conclusions 149 | - Weekdays generate significantly more orders than weekend days (~4x more orders on an average weekday compared to an average weekend day) 150 | - Early mornings (7-9am) on weekdays generate almost half of all orders for the company 151 | - There was a drop in orders in June 2017 - this has not been covered in this analysis, but should be investigated 152 | 153 | ### Recommended Follow-on Analyses 154 | - Investigate drop in orders in June 2017 155 | - Study customer purchasing behaviour, especially during weekday early mornings 156 | - Extend this analysis with a longer timeframe to investigate seasonality throughout the calendar year 157 | 158 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/pages/customers/[customer].md: -------------------------------------------------------------------------------- 1 | # {$page.params.customer}'s Customer Profile 2 | 3 | ```customers 4 | select 5 | *, 6 | first_ordered_at as first_order_longdate, 7 | last_ordered_at as last_order_longdate, 8 | lifetime_spend as lifetime_spend_usd, 9 | lifetime_spend / count_lifetime_orders as average_order_value_usd 10 | from analytics.customers 11 | ``` 12 | 13 | {$page.params.customer} has been a customer since d.customer_name === $page.params.customer)} column=first_order_longdate/>, with their most recent order occurring on d.customer_name === $page.params.customer)} column=last_order_longdate/>. 14 | 15 | ### Key stats: 16 | - d.customer_name === $page.params.customer)} column=count_lifetime_orders/> lifetime orders 17 | - d.customer_name === $page.params.customer)} column=lifetime_spend_usd/> in lifetime spend 18 | - d.customer_name === $page.params.customer)} column=average_order_value_usd/> average order value 19 | 20 | ```monthly_purchases 21 | select 22 | date_trunc('month', a.ordered_at) as month, 23 | b.customer_name, 24 | sum(a.order_total) as purchases_usd 25 | from analytics.orders a 26 | left join analytics.customers b 27 | on a.customer_id = b.customer_id 28 | group by month, customer_name 29 | order by month asc 30 | ``` 31 | 32 | d.customer_name === $page.params.customer)} 34 | x=month 35 | y=purchases_usd 36 | title="Purchases per Month by {$page.params.customer}" 37 | /> -------------------------------------------------------------------------------- /test_projects/revenue/reports/pages/customers/index.md: -------------------------------------------------------------------------------- 1 | # Customers 2 | 3 | ```customers 4 | select 5 | customer_name, 6 | concat('/customers/', customer_name) as customer_link, 7 | count_lifetime_orders as lifetime_orders, 8 | lifetime_spend as lifetime_spend_usd, 9 | lifetime_spend / count_lifetime_orders as average_order_value_usd 10 | from analytics.customers 11 | order by lifetime_spend_usd desc 12 | ``` 13 | 14 | Click a row to see the report for that customer: 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/pages/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to Jaffle Shop 🥪 2 | 3 | ```monthly_stats 4 | with 5 | monthly_stats as ( 6 | select 7 | date_trunc('month', ordered_at) as month, 8 | sum(order_total) as revenue_usd1k, 9 | count(*)::float as orders, 10 | count(distinct customer_id)::float as customers 11 | 12 | from analytics.orders 13 | group by month 14 | order by month desc 15 | ) 16 | 17 | select 18 | *, 19 | revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1, 20 | orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1, 21 | customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1, 22 | monthname(month) as month_name 23 | from monthly_stats 24 | ``` 25 | 26 | 33 | 34 | 41 | 42 | Jaffle Shop locations served happy customers in . This was a change of from . 43 | 44 | ## Store Openings 45 | 46 | ```store_opening 47 | with 48 | most_recent_open as ( 49 | select 50 | location_name as opened_store, 51 | min(ordered_at) as opened_date_mmmyyyy, 52 | sum(order_total) as opened_revenue_usd 53 | from analytics.orders 54 | group by location_name 55 | order by opened_date_mmmyyyy desc 56 | limit 1 57 | ), 58 | 59 | company_total as ( 60 | select 61 | sum(order_total) as company_revenue_usd, 62 | from analytics.orders 63 | cross join most_recent_open 64 | where ordered_at >= opened_date_mmmyyyy 65 | ) 66 | 67 | select 68 | *, 69 | opened_revenue_usd / company_revenue_usd as revenue_pct 70 | from most_recent_open 71 | cross join company_total 72 | ``` 73 | 74 | The most recent Jaffle Shop store opening was in . Since opening, has contributed of total company sales. 75 | 76 | ```orders_per_week 77 | select 78 | date_trunc('week', ordered_at) as week, 79 | location_name, 80 | count(*) as orders, 81 | sum(order_total) as revenue_usd 82 | 83 | from analytics.orders 84 | 85 | group by 1,2 86 | order by 1 87 | ``` 88 | 89 | 98 | 99 | ```revenue_per_city 100 | select 101 | location_name as city, 102 | concat('/stores/', location_name) as store_link, 103 | count(distinct customer_id) as customers, 104 | count(*) as orders, 105 | sum(order_total) as revenue_usd 106 | 107 | from analytics.orders 108 | 109 | group by 1, 2 110 | ``` 111 | 112 | ## Reports on Individual Stores 113 | Click a row to see the report for that store: 114 | 115 | 116 | ## Seasonality 117 | See [Seasonality Investigation](/analysis/seasonality-investigation) for more information. 118 | 119 | ## Customers 120 | To see individual customer purchase history, see [Customers](/customers) 121 | 122 | ### Customer Cohorts 123 | Average order values are tracked using monthly cohorts, which are created by truncating `first_order_date` to month. 124 | 125 | ```customers_with_cohort 126 | select 127 | *, 128 | date_trunc('month', first_ordered_at) as cohort_month, 129 | lifetime_spend_pretax / count_lifetime_orders as average_order_value_usd0 130 | 131 | from analytics.customers 132 | ``` 133 | 134 | ```cohorts_aov 135 | select 136 | cohort_month, 137 | avg(average_order_value_usd0) as cohort_aov_usd 138 | 139 | from ${customers_with_cohort} 140 | 141 | group by 1 142 | order by cohort_month 143 | ``` 144 | 145 | 153 | 154 | ### Average Order Values 155 | 156 | 163 | -------------------------------------------------------------------------------- /test_projects/revenue/reports/pages/stores/[city].md: -------------------------------------------------------------------------------- 1 | # Jaffle Shop {$page.params.city} 🥪 2 | 3 | ```opening 4 | select 5 | location_name, 6 | min(month) as opened_month_mmmyyyy 7 | from ${monthly_stats} 8 | group by location_name 9 | order by opened_month_mmmyyyy desc 10 | ``` 11 | 12 | {#if opening[0].location_name === $page.params.city} 13 | 14 | {$page.params.city} is the most recent store opening for Jaffle Shop, opened in d.location_name === $page.params.city)} column=opened_month_mmmyyyy />. 15 | 16 | {:else} 17 | 18 | The {$page.params.city} location was opened in d.location_name === $page.params.city)} column=opened_month_mmmyyyy />. 19 | 20 | {/if} 21 | 22 | ```monthly_stats 23 | with 24 | monthly_stats as ( 25 | select 26 | date_trunc('month', ordered_at) as month, 27 | location_name, 28 | sum(order_total) as revenue_usd1k, 29 | count(*)::float as orders, 30 | count(distinct customer_id)::float as customers 31 | 32 | from analytics.orders 33 | group by month, location_name 34 | order by month desc 35 | ) 36 | 37 | select 38 | *, 39 | revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1, 40 | orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1, 41 | customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1, 42 | monthname(month) as month_name 43 | from monthly_stats 44 | ``` 45 | 46 | data.location_name === $page.params.city)} 48 | value=revenue_usd1k 49 | comparison=revenue_growth_pct1 50 | title="Monthly Revenue" 51 | comparisonTitle="vs. prev. month" 52 | /> 53 | 54 | data.location_name === $page.params.city)} 56 | value=orders 57 | comparison=order_growth_pct1 58 | title="Monthly Orders" 59 | comparisonTitle="vs. prev. month" 60 | /> 61 | 62 | Jaffle Shop {$page.params.city} served d.location_name === $page.params.city)} column=customers/> happy customers in d.location_name === $page.params.city)} column=month_name/>. This was a change of d.location_name === $page.params.city)} column=customer_growth_pct1/> from d.location_name === $page.params.city)} column=month_name row=1/>. 63 | 64 | ```orders_per_week 65 | select 66 | location_name as city, 67 | date_trunc('week', ordered_at) as week, 68 | count(*) as orders 69 | 70 | from analytics.orders 71 | 72 | group by 1, 2 73 | order by 1, 2 74 | ``` 75 | 76 | ## Orders Per Week in {$page.params.city} 77 | 78 | data.city === $page.params.city)} 80 | x=week 81 | y=orders 82 | yAxisTitle="orders per week in {$page.params.city}" 83 | /> -------------------------------------------------------------------------------- /test_projects/revenue/reports/pages/stores/index.md: -------------------------------------------------------------------------------- 1 | # Stores 2 | 3 | ```revenue_per_city 4 | select 5 | location_name as city, 6 | concat('/stores/', location_name) as store_link, 7 | count(distinct customer_id) as customers, 8 | count(*) as orders, 9 | sum(order_total) as revenue_usd 10 | 11 | from analytics.orders 12 | 13 | group by 1, 2 14 | ``` 15 | 16 | Click a row to see the report for that store: 17 | -------------------------------------------------------------------------------- /test_projects/revenue/requirements.txt: -------------------------------------------------------------------------------- 1 | dbt-postgres==1.6.0-b8 2 | jafgen~=0.3.1 3 | pre-commit~=3.0.4 4 | sqlfluff-templater-dbt~=2.0.0a5 5 | sqlfluff~=2.0.0a5 -------------------------------------------------------------------------------- /test_projects/revenue/seeds/__seeds.yml: -------------------------------------------------------------------------------- 1 | seeds: 2 | - name: integers 3 | config: 4 | # Manually add to config to support dbt-core 1.6.x. Note that you 5 | # cannot have both in latest version of 1.7.x. 6 | access: public 7 | 8 | - name: seed_accounts 9 | config: 10 | access: private 11 | -------------------------------------------------------------------------------- /test_projects/revenue/seeds/integers.csv: -------------------------------------------------------------------------------- 1 | id 2 | 1 3 | 2 4 | 3 5 | 4 -------------------------------------------------------------------------------- /test_projects/revenue/seeds/seed_accounts.csv: -------------------------------------------------------------------------------- 1 | name 2 | foo 3 | bar 4 | baz -------------------------------------------------------------------------------- /test_projects/revenue/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/snapshots/.gitkeep -------------------------------------------------------------------------------- /test_projects/revenue/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/tests/.gitkeep -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | def test_dbt_loom_injects_model(): 2 | """Test if a dbtLoom model is injected into a dbt context.""" 3 | pass 4 | -------------------------------------------------------------------------------- /tests/test_dbt_core_execution.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import dbt 5 | from dbt.cli.main import dbtRunner, dbtRunnerResult 6 | 7 | 8 | import dbt.exceptions 9 | 10 | 11 | starting_path = os.getcwd() 12 | 13 | 14 | def test_dbt_core_runs_loom_plugin(): 15 | """Verify that dbt-core runs the dbt-loom plugin and nodes are injected.""" 16 | 17 | runner = dbtRunner() 18 | 19 | # Compile the revenue project 20 | 21 | os.chdir(f"{starting_path}/test_projects/revenue") 22 | runner.invoke(["clean"]) 23 | runner.invoke(["deps"]) 24 | runner.invoke(["compile"]) 25 | 26 | # Run `build` in the customer_success project 27 | os.chdir(f"{starting_path}/test_projects/customer_success") 28 | runner.invoke(["clean"]) 29 | runner.invoke(["deps"]) 30 | output: dbtRunnerResult = runner.invoke(["build"]) 31 | 32 | # Make sure nothing failed 33 | assert output.exception is None 34 | 35 | output: dbtRunnerResult = runner.invoke(["ls"]) 36 | 37 | # Make sure nothing failed 38 | assert output.exception is None 39 | 40 | # Check for injection 41 | assert isinstance(output.result, list) 42 | 43 | # Check that the versioned models work. 44 | subset = { 45 | "revenue.orders.v1", 46 | "revenue.orders.v2", 47 | } 48 | 49 | # Excluded packages do not get injected and loaded into a manifest. 50 | assert not any(["dbt_project_evaluator" in item for item in output.result]) 51 | 52 | os.chdir(starting_path) 53 | 54 | assert set(output.result).issuperset( 55 | subset 56 | ), "The child project is missing expected nodes. Check that injection still works." 57 | 58 | 59 | def test_dbt_loom_injects_dependencies(): 60 | """Verify that dbt-core runs the dbt-loom plugin and that it flags access violations.""" 61 | 62 | runner = dbtRunner() 63 | 64 | # Compile the revenue project 65 | os.chdir(f"{starting_path}/test_projects/revenue") 66 | runner.invoke(["clean"]) 67 | runner.invoke(["deps"]) 68 | output = runner.invoke(["compile"]) 69 | 70 | assert output.exception is None, output.exception.get_message() # type: ignore 71 | 72 | path = Path( 73 | f"{starting_path}/test_projects/customer_success/models/staging/stg_orders_enhanced.sql" 74 | ) 75 | 76 | with open(path, "w") as file: 77 | file.write( 78 | """ 79 | with 80 | upstream as ( 81 | select * from {{ ref('revenue', 'stg_orders') }} 82 | ) 83 | 84 | select * from upstream 85 | """ 86 | ) 87 | 88 | # Run `ls`` in the customer_success project 89 | os.chdir(f"{starting_path}/test_projects/customer_success") 90 | runner.invoke(["clean"]) 91 | runner.invoke(["deps"]) 92 | output: dbtRunnerResult = runner.invoke(["build"]) 93 | 94 | path.unlink() 95 | 96 | os.chdir(starting_path) 97 | 98 | # Make sure nothing failed 99 | assert isinstance(output.exception, dbt.exceptions.DbtReferenceError) 100 | 101 | 102 | def test_dbt_loom_injects_groups(): 103 | """Verify that dbt-core runs the dbt-loom plugin and that it flags group violations.""" 104 | 105 | runner = dbtRunner() 106 | 107 | # Compile the revenue project 108 | os.chdir(f"{starting_path}/test_projects/revenue") 109 | runner.invoke(["clean"]) 110 | runner.invoke(["deps"]) 111 | output = runner.invoke(["compile"]) 112 | 113 | assert output.exception is None 114 | 115 | path = Path( 116 | f"{starting_path}/test_projects/customer_success/models/marts/marketing_lists.sql" 117 | ) 118 | 119 | with open(path, "w") as file: 120 | file.write( 121 | """ 122 | with 123 | upstream as ( 124 | select * from {{ ref('accounts') }} 125 | ) 126 | 127 | select * from upstream 128 | """ 129 | ) 130 | 131 | # Run `ls`` in the customer_success project 132 | os.chdir(f"{starting_path}/test_projects/customer_success") 133 | runner.invoke(["clean"]) 134 | runner.invoke(["deps"]) 135 | output: dbtRunnerResult = runner.invoke(["build"]) 136 | 137 | path.unlink() 138 | 139 | os.chdir(starting_path) 140 | 141 | # Make sure nothing failed 142 | assert isinstance(output.exception, dbt.exceptions.DbtReferenceError) 143 | 144 | 145 | def test_dbt_core_telemetry_blocking(): 146 | """Verify that dbt-loom prevents telemetry about itself from being sent.""" 147 | import shutil 148 | 149 | runner = dbtRunner() 150 | 151 | # Compile the revenue project 152 | 153 | os.chdir(f"{starting_path}/test_projects/revenue") 154 | runner.invoke(["clean"]) 155 | runner.invoke(["deps"]) 156 | shutil.rmtree("logs") 157 | runner.invoke(["compile"]) 158 | 159 | # Check that no plugin events were sent. This is important to verify that 160 | # telemetry blocking is working. 161 | with open("logs/dbt.log") as log_file: 162 | assert "plugin_get_nodes" not in log_file.read() 163 | 164 | os.chdir(starting_path) 165 | -------------------------------------------------------------------------------- /tests/test_mainfest_node.py: -------------------------------------------------------------------------------- 1 | from dbt_loom.manifests import ManifestNode 2 | 3 | 4 | try: 5 | from dbt.artifacts.resources.types import NodeType 6 | except ModuleNotFoundError: 7 | from dbt.node_types import NodeType # type: ignore 8 | 9 | 10 | def test_rewrite_resource_types(): 11 | """Confirm that resource types are rewritten if they are incorrect due to previous injections.""" 12 | 13 | node = { 14 | "unique_id": "seed.example.foo", 15 | "name": "foo", 16 | "package_name": "example", 17 | "schema": "bar", 18 | "resource_type": "model", 19 | } 20 | 21 | manifest_node = ManifestNode(**(node)) # type: ignore 22 | 23 | assert manifest_node.resource_type == NodeType.Seed 24 | -------------------------------------------------------------------------------- /tests/test_manifest_loaders.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | from typing import Dict, Generator, Tuple 5 | from urllib.parse import urlparse 6 | 7 | import pytest 8 | from dbt_loom.config import ( 9 | FileReferenceConfig, 10 | ManifestReference, 11 | ManifestReferenceType, 12 | LoomConfigurationError, 13 | ) 14 | from dbt_loom.manifests import ManifestLoader, UnknownManifestPathType 15 | 16 | 17 | @pytest.fixture 18 | def example_file() -> Generator[Tuple[Path, Dict], None, None]: 19 | example_content = {"foo": "bar"} 20 | path = Path("example.json") 21 | with open(path, "w") as file: 22 | json.dump(example_content, file) 23 | yield path, example_content 24 | path.unlink() 25 | 26 | 27 | def test_load_from_local_filesystem_pass(example_file): 28 | """Test that ManifestLoader can load a local JSON file.""" 29 | 30 | path, example_content = example_file 31 | 32 | file_config = FileReferenceConfig( 33 | path=urlparse("file://" + str(Path(path).absolute())) 34 | ) 35 | 36 | output = ManifestLoader.load_from_local_filesystem(file_config) 37 | 38 | assert output == example_content 39 | 40 | 41 | def test_load_from_local_filesystem_local_path(example_file): 42 | """Test that ManifestLoader can load a local JSON file.""" 43 | 44 | path, example_content = example_file 45 | 46 | file_config = FileReferenceConfig(path=str(path)) # type: ignore 47 | 48 | output = ManifestLoader.load_from_local_filesystem(file_config) 49 | 50 | assert output == example_content 51 | 52 | 53 | def test_load_from_path_fails_invalid_scheme(example_file): 54 | """ 55 | est that ManifestLoader will raise the appropriate exception if an invalid 56 | scheme is applied. 57 | """ 58 | 59 | file_config = FileReferenceConfig( 60 | path=urlparse("ftp://example.com/example.json"), 61 | ) # type: ignore 62 | 63 | with pytest.raises(UnknownManifestPathType): 64 | ManifestLoader.load_from_path(file_config) 65 | 66 | 67 | def test_load_from_remote_pass(example_file): 68 | """Test that ManifestLoader can load a remote JSON file via HTTP(S).""" 69 | 70 | _, example_content = example_file 71 | 72 | file_config = FileReferenceConfig( 73 | path=urlparse( 74 | "https://s3.us-east-2.amazonaws.com/com.nicholasyager.dbt-loom/example.json" 75 | ), 76 | ) 77 | 78 | output = ManifestLoader.load_from_http(file_config) 79 | 80 | assert output == example_content 81 | 82 | 83 | def test_manifest_loader_selection(example_file): 84 | """Confirm scheme parsing works for picking the manifest loader.""" 85 | _, example_content = example_file 86 | manifest_loader = ManifestLoader() 87 | 88 | file_config = FileReferenceConfig( 89 | path=urlparse( 90 | "https://s3.us-east-2.amazonaws.com/com.nicholasyager.dbt-loom/example.json" 91 | ), 92 | ) 93 | 94 | manifest_reference = ManifestReference( 95 | name="example", type=ManifestReferenceType.file, config=file_config 96 | ) 97 | 98 | manifest = manifest_loader.load(manifest_reference) 99 | 100 | assert manifest == example_content 101 | 102 | 103 | def test_load_from_local_filesystem_optional_missing(): 104 | """If the manifest file does not exist, it should not raise an error if optional=True.""" 105 | file_config = FileReferenceConfig( 106 | path="not_exist_manifest.json" 107 | ) 108 | manifest_reference = ManifestReference( 109 | name="missing", 110 | type=ManifestReferenceType.file, 111 | config=file_config, 112 | optional=True, 113 | ) 114 | manifest_loader = ManifestLoader() 115 | manifest = manifest_loader.load(manifest_reference) 116 | assert manifest is None 117 | 118 | 119 | def test_load_from_local_filesystem_not_optional_missing(): 120 | """If the manifest file does not exist, it should raise an error if optional=False.""" 121 | file_config = FileReferenceConfig( 122 | path="not_exist_manifest.json" 123 | ) 124 | manifest_reference = ManifestReference( 125 | name="missing", 126 | type=ManifestReferenceType.file, 127 | config=file_config, 128 | optional=False, 129 | ) 130 | manifest_loader = ManifestLoader() 131 | with pytest.raises(LoomConfigurationError): 132 | manifest_loader.load(manifest_reference) 133 | --------------------------------------------------------------------------------