├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── publish-docs-main.yml
    │   ├── publish-docs-release.yml
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── dbt_loom
    ├── __init__.py
    ├── clients
    │   ├── az_blob.py
    │   ├── dbt_cloud.py
    │   ├── gcs.py
    │   ├── s3.py
    │   └── snowflake_stage.py
    ├── config.py
    ├── logging.py
    ├── manifests.py
    └── shims.py
├── docs
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── advanced-configuration.md
    ├── getting-started.md
    └── index.md
├── mkdocs.yml
├── poetry.lock
├── pyproject.toml
├── test_projects
    ├── customer_success
    │   ├── .gitignore
    │   ├── .pre-commit-config.yaml
    │   ├── .sqlfluff
    │   ├── .sqlfluffignore
    │   ├── README.md
    │   ├── Taskfile.yml
    │   ├── analyses
    │   │   └── .gitkeep
    │   ├── dbt_loom.config.yml
    │   ├── dbt_project.yml
    │   ├── jaffle-data
    │   │   └── raw_customers.csv
    │   ├── macros
    │   │   ├── .gitkeep
    │   │   └── cents_to_dollars.sql
    │   ├── meltano.yml
    │   ├── models
    │   │   ├── marts
    │   │   │   ├── __models.yml
    │   │   │   ├── customer_status_histories.py
    │   │   │   └── customers.sql
    │   │   └── staging
    │   │   │   ├── __models.yml
    │   │   │   ├── __sources.yml
    │   │   │   └── stg_customers.sql
    │   ├── package-lock.yml
    │   ├── packages.yml
    │   ├── profiles.yml
    │   ├── reports
    │   │   ├── .evidence
    │   │   │   └── customization
    │   │   │   │   └── custom-formatting.json
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── package-lock.json
    │   │   ├── package.json
    │   │   └── pages
    │   │   │   ├── analysis
    │   │   │       └── seasonality-investigation.md
    │   │   │   ├── customers
    │   │   │       ├── [customer].md
    │   │   │       └── index.md
    │   │   │   ├── index.md
    │   │   │   └── stores
    │   │   │       ├── [city].md
    │   │   │       └── index.md
    │   ├── requirements.txt
    │   ├── snapshots
    │   │   └── .gitkeep
    │   └── tests
    │   │   └── .gitkeep
    └── revenue
    │   ├── .gitignore
    │   ├── .pre-commit-config.yaml
    │   ├── .sqlfluff
    │   ├── .sqlfluffignore
    │   ├── README.md
    │   ├── Taskfile.yml
    │   ├── analyses
    │       └── .gitkeep
    │   ├── dbt_loom.config.yml
    │   ├── dbt_project.yml
    │   ├── jaffle-data
    │       ├── raw_items.csv
    │       ├── raw_orders.csv
    │       ├── raw_products.csv
    │       ├── raw_stores.csv
    │       └── raw_supplies.csv
    │   ├── macros
    │       ├── .gitkeep
    │       └── cents_to_dollars.sql
    │   ├── meltano.yml
    │   ├── models
    │       ├── groups.yml
    │       ├── marts
    │       │   ├── __models.yml
    │       │   ├── accounts.sql
    │       │   ├── orders_v1.sql
    │       │   └── orders_v2.sql
    │       └── staging
    │       │   ├── __models.yml
    │       │   ├── __sources.yml
    │       │   ├── stg_accounts.sql
    │       │   ├── stg_locations.sql
    │       │   ├── stg_order_items.sql
    │       │   ├── stg_orders.sql
    │       │   ├── stg_products.sql
    │       │   └── stg_supplies.sql
    │   ├── package-lock.yml
    │   ├── packages.yml
    │   ├── profiles.yml
    │   ├── reports
    │       ├── .evidence
    │       │   └── customization
    │       │   │   └── custom-formatting.json
    │       ├── .gitignore
    │       ├── README.md
    │       ├── package-lock.json
    │       ├── package.json
    │       └── pages
    │       │   ├── analysis
    │       │       └── seasonality-investigation.md
    │       │   ├── customers
    │       │       ├── [customer].md
    │       │       └── index.md
    │       │   ├── index.md
    │       │   └── stores
    │       │       ├── [city].md
    │       │       └── index.md
    │   ├── requirements.txt
    │   ├── seeds
    │       ├── __seeds.yml
    │       ├── integers.csv
    │       └── seed_accounts.csv
    │   ├── snapshots
    │       └── .gitkeep
    │   └── tests
    │       └── .gitkeep
└── tests
    ├── __init__.py
    ├── test_dbt_core_execution.py
    ├── test_mainfest_node.py
    └── test_manifest_loaders.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: nicholasyager # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug, triage
 6 | assignees: nicholasyager
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. dbt state
16 | 2. Run the <> command
17 | 3. See error
18 | 
19 | **Expected behavior**
20 | A clear and concise description of what you expected to happen.
21 | 
22 | **Screenshots**
23 | If applicable, add screenshots to help explain your problem.
24 | 
25 |  - OS: [e.g. MacOS 14.2.1]
26 |  - dbt-loom Version [e.g. 0.4.0]
27 |  - dbt-core Version [e.g. 1.7.10]
28 | 
29 | 
30 | **Additional context**
31 | Add any other context about the problem here.
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement, triage
 6 | assignees: nicholasyager
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-docs-main.yml:
--------------------------------------------------------------------------------
 1 | name: Publish MkDocs on Main Branch
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout repository
13 |         uses: actions/checkout@v3
14 |         with:
15 |           fetch-depth: 0
16 | 
17 |       - name: Set up Python
18 |         uses: actions/setup-python@v4
19 |         with:
20 |           python-version: 3.9
21 | 
22 |       - name: Install dependencies
23 |         run: |
24 |           python3 -m pip install --upgrade pip
25 |           pip install poetry
26 |           poetry install --with=docs
27 | 
28 |       - name: Deploy to GitHub Pages
29 |         env:
30 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31 |         run: |
32 |           git config --global user.name "GitHub Actions Bot"
33 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
34 |           poetry run mike deploy --push --message "Deployed by GitHub Actions" main
35 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-docs-release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish MkDocs on Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - published
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout repository
13 |         uses: actions/checkout@v3
14 |         with:
15 |           fetch-depth: 0
16 | 
17 |       - name: Set up Python
18 |         uses: actions/setup-python@v4
19 |         with:
20 |           python-version: 3.9
21 | 
22 |       - name: Install dependencies
23 |         run: |
24 |           python3 -m pip install --upgrade pip
25 |           pip install poetry
26 |           poetry install --with=docs
27 | 
28 |       - name: Extract Major and Minor Version
29 |         run: |
30 |           VERSION_TAG=${{ github.event.release.tag_name }}
31 |           VERSION_TAG="${VERSION_TAG#v}"  # Remove 'v' prefix if present
32 |           MAJOR="${VERSION_TAG%%.*}"
33 |           MINOR="${VERSION_TAG#*.}"
34 |           MINOR="${MINOR%%.*}"
35 |           MAJOR_MINOR_VERSION="${MAJOR}.${MINOR}"
36 |           echo "MAJOR_MINOR_VERSION=${MAJOR_MINOR_VERSION}" >> $GITHUB_ENV
37 | 
38 |       - name: Deploy Updated Docs
39 |         env:
40 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41 |         run: |
42 |           git config user.name "GitHub Actions"
43 |           git config user.email "actions@github.com"
44 |           poetry run mike deploy --push --message "Deploy docs for release ${{ github.event.release.tag_name }}" --update-alias $MAJOR_MINOR_VERSION latest
45 |           poetry run mike set-default --push latest
46 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: PyPi Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   release:
 9 |     environment: release
10 |     strategy:
11 |       fail-fast: false
12 |       matrix:
13 |         python-version: [3.11]
14 |         poetry-version: [1.4.2]
15 | 
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v2
20 | 
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v2
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 | 
26 |     - name: Install poetry ${{ matrix.poetry-version }}
27 |       run: |
28 |         python -m ensurepip
29 |         python -m pip install --upgrade pip
30 |         python -m pip install poetry==${{ matrix.poetry-version }}
31 | 
32 |     - name: Install dependencies
33 |       shell: bash
34 |       run: python -m poetry install
35 | 
36 |     - name: Build
37 |       run: |
38 |         python -m poetry build
39 | 
40 |     - name: Publish
41 |       env:
42 |         POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }}
43 |       run: |
44 |         python -m poetry publish --skip-existing


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Run tests on pull requests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   test:
10 |     strategy:
11 |       fail-fast: false
12 |       matrix:
13 |         python-version: [3.11]
14 |         poetry-version: [1.4.2]
15 |         dbt-version: [1.6.0, 1.7.0, 1.8.0, 1.9.0b2]
16 | 
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v2
21 | 
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@v2
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 | 
27 |       - name: Install poetry ${{ matrix.poetry-version }}
28 |         run: |
29 |           python -m ensurepip
30 |           python -m pip install --upgrade pip
31 |           python -m pip install poetry==${{ matrix.poetry-version }}
32 | 
33 |       - name: Install dependencies
34 |         shell: bash
35 |         run: python -m poetry install --with=dev
36 | 
37 |       - name: Install dbt-core
38 |         shell: bash
39 |         run: python -m poetry add dbt-core~=${{ matrix.dbt-version }} --allow-prereleases
40 | 
41 |       - name: Test
42 |         run: |
43 |           python -m poetry run pytest
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | venv/
 3 | venv-*/
 4 | dist/
 5 | 
 6 | __pycache__/
 7 | .mypy_cache/
 8 | .pytest_cache/
 9 | 
10 | .idea/
11 | .vscode/
12 | 
13 | */target/
14 | */dbt_packages/
15 | */logs/
16 | logs/
17 | *.duckdb
18 | *.duckdb.wal
19 | *.user.yml
20 | *.db
21 | 
22 | reports/sources/*.csv
23 | 
24 | .meltano
25 | .DS_Store
26 | .ruff_cache


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_stages: [push]
 2 | 
 3 | default_language_version:
 4 |   python: python3.11
 5 | 
 6 | repos:
 7 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 8 |     # Ruff version.
 9 |     rev: v0.0.278
10 |     hooks:
11 |       - id: ruff
12 |         args: [--fix, --exit-non-zero-on-fix]
13 | 
14 |   - repo: local
15 |     hooks:
16 |       - id: ruff
17 |         stages: [commit, push]
18 |         name: ruff_format
19 |         entry: poetry run ruff format
20 |         language: system
21 |         types: [python]
22 |       - id: mypy
23 |         stages: [commit, push]
24 |         name: mypy
25 |         entry: poetry run mypy --ignore-missing-imports
26 |         language: system
27 |         types: [python]
28 | 
29 |   - repo: https://github.com/pre-commit/pre-commit-hooks
30 |     rev: v2.1.0
31 |     hooks:
32 |       - id: trailing-whitespace
33 |         stages: [commit, push]
34 |       - id: check-added-large-files
35 |       - id: check-ast
36 |         stages: [commit, push]
37 |       - id: check-case-conflict
38 |       - id: check-byte-order-marker
39 |       - id: check-executables-have-shebangs
40 |       - id: check-docstring-first
41 |         stages: [commit, push]
42 |       - id: check-json
43 |       - id: check-merge-conflict
44 |         stages: [commit, push]
45 |       - id: check-symlinks
46 |       - id: check-vcs-permalinks
47 |       - id: check-xml
48 |       - id: check-yaml
49 |       - id: debug-statements
50 |       - id: detect-private-key
51 |       # - id: flake8
52 |       # stages: [commit,push]
53 |       - id: forbid-new-submodules
54 |       - id: no-commit-to-branch
55 |         stages: [commit, push]
56 |         args:
57 |           - --branch=main
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dbt-loom
  2 | 
  3 | [![pypi version shield](https://img.shields.io/pypi/v/dbt-loom)](https://img.shields.io/pypi/v/dbt-loom)
  4 | 
  5 | dbt-loom is a dbt Core plugin that weaves together multi-project deployments. dbt-loom works by fetching public model definitions from your dbt artifacts, and injecting those models into your dbt project.
  6 | 
  7 | ```mermaid
  8 | flowchart LR
  9 | 
 10 |     classDef black fill:#f2f2ebff, stroke:#000, color:#000
 11 |     classDef background fill:#f2f2ebff, stroke:#000, color:#000
 12 |     classDef hidden fill:#BADC3F, stroke:#BADC3F, color:#BADC3F
 13 | 
 14 |    style TOP fill:#BADC3F, stroke:#000
 15 | 
 16 |   subgraph TOP[Your Infrastructure]
 17 |     direction TB
 18 |     dbt_runtime[dbt Core]:::background
 19 |     proprietary_plugin[Open Source Metadata Plugin]:::background
 20 | 
 21 |     files[Local and Remote Files]:::background
 22 |     object_storage[Object Storage]:::background
 23 |     discovery_api[dbt Cloud APIs]:::background
 24 | 
 25 |     discovery_api --> proprietary_plugin
 26 |     files --> proprietary_plugin
 27 |     object_storage --> proprietary_plugin
 28 |     proprietary_plugin --> dbt_runtime
 29 |   end
 30 | 
 31 |   Project:::black --> TOP --> Warehouse:::black
 32 | ```
 33 | 
 34 | dbt-loom currently supports obtaining model definitions from:
 35 | 
 36 | - Local manifest files
 37 | - Remote manifest files via http(s)
 38 | - dbt Cloud
 39 | - GCS
 40 | - S3-compatible object storage services
 41 | - Azure Storage
 42 | 
 43 | ## Getting Started
 44 | 
 45 | To begin, install the `dbt-loom` python package.
 46 | 
 47 | ```console
 48 | pip install dbt-loom
 49 | ```
 50 | 
 51 | Next, create a `dbt-loom` configuration file. This configuration file provides the paths for your
 52 | upstream project's manifest files.
 53 | 
 54 | ```yaml
 55 | manifests:
 56 |   - name: project_name # This should match the project's real name
 57 |     type: file
 58 |     config:
 59 |       # A path to your manifest. This can be either a local path, or a remote
 60 |       # path accessible via http(s).
 61 |       path: path/to/manifest.json
 62 | ```
 63 | 
 64 | By default, `dbt-loom` will look for `dbt_loom.config.yml` in your working directory. You can also set the
 65 | `DBT_LOOM_CONFIG` environment variable.
 66 | 
 67 | ## How does it work?
 68 | 
 69 | As of dbt-core 1.6.0-b8, there now exists a `dbtPlugin` class which defines functions that can
 70 | be called by dbt-core's `PluginManager`. During different parts of the dbt-core lifecycle (such as graph linking and
 71 | manifest writing), the `PluginManager` will be called and all plugins registered with the appropriate hook will be executed.
 72 | 
 73 | dbt-loom implements a `get_nodes` hook, and uses a configuration file to parse manifests, identify public models, and
 74 | inject those public models when called by `dbt-core`.
 75 | 
 76 | ## Advanced Features
 77 | 
 78 | ### Loading artifacts from remote sources
 79 | 
 80 | `dbt-loom` supports automatically fetching manifest artifacts from a variety
 81 | of remote sources.
 82 | 
 83 | #### Using dbt Cloud as an artifact source
 84 | 
 85 | You can use dbt-loom to fetch model definitions from dbt Cloud by setting up a `dbt-cloud` manifest in your `dbt-loom` config, and setting the `DBT_CLOUD_API_TOKEN` environment variable in your execution environment.
 86 | 
 87 | ```yaml
 88 | manifests:
 89 |   - name: project_name
 90 |     type: dbt_cloud
 91 |     config:
 92 |       account_id: <YOUR DBT CLOUD ACCOUNT ID>
 93 | 
 94 |       # Job ID pertains to the job that you'd like to fetch artifacts from.
 95 |       job_id: <REFERENCE JOB ID>
 96 | 
 97 |       api_endpoint: <DBT CLOUD ENDPOINT>
 98 |       # dbt Cloud has multiple regions with different URLs. Update this to
 99 |       # your appropriate dbt cloud endpoint.
100 | 
101 |       step_id: <JOB STEP>
102 |       # If your job generates multiple artifacts, you can set the step from
103 |       # which to fetch artifacts. Defaults to the last step.
104 | ```
105 | 
106 | #### Using an S3-compatible object store as an artifact source
107 | 
108 | You can use dbt-loom to fetch manifest files from S3-compatible object stores
109 | by setting up ab `s3` manifest in your `dbt-loom` config. Please note that this
110 | approach supports all standard boto3-compatible environment variables and authentication mechanisms. Please see the [boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables) for more details.
111 | 
112 | ```yaml
113 | manifests:
114 |   - name: project_name
115 |     type: s3
116 |     config:
117 |       bucket_name: <YOUR S3 BUCKET NAME>
118 |       # The name of the bucket where your manifest is stored.
119 | 
120 |       object_name: <YOUR OBJECT NAME>
121 |       # The object name of your manifest file.
122 | ```
123 | 
124 | #### Using GCS as an artifact source
125 | 
126 | You can use dbt-loom to fetch manifest files from Google Cloud Storage by setting up a `gcs` manifest in your `dbt-loom` config.
127 | 
128 | ```yaml
129 | manifests:
130 |   - name: project_name
131 |     type: gcs
132 |     config:
133 |       project_id: <YOUR GCP PROJECT ID>
134 |       # The alphanumeric ID of the GCP project that contains your target bucket.
135 | 
136 |       bucket_name: <YOUR GCS BUCKET NAME>
137 |       # The name of the bucket where your manifest is stored.
138 | 
139 |       object_name: <YOUR OBJECT NAME>
140 |       # The object name of your manifest file.
141 | 
142 |       credentials: <PATH TO YOUR SERVICE ACCOUNT JSON CREDENTIALS>
143 |       # The OAuth2 Credentials to use. If not passed, falls back to the default inferred from the environment.
144 | ```
145 | 
146 | #### Using Azure Storage as an artifact source
147 | 
148 | You can use dbt-loom to fetch manifest files from Azure Storage
149 | by setting up an `azure` manifest in your `dbt-loom` config. The `azure` type implements
150 | the [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python)
151 | class, supporting all environment variables and authentication mechanisms.
152 | Alternatively, set the `AZURE_STORAGE_CONNECTION_STRING` environment variable to
153 | authenticate via a connection string.
154 | 
155 | ```yaml
156 | manifests:
157 |   - name: project_name
158 |     type: azure
159 |     config:
160 |       account_name: <YOUR AZURE STORAGE ACCOUNT NAME> # The name of your Azure Storage account
161 |       container_name: <YOUR AZURE STORAGE CONTAINER NAME> # The name of your Azure Storage container
162 |       object_name: <YOUR OBJECT NAME> # The object name of your manifest file.
163 | ```
164 | 
165 | #### Using Snowflake Stage as an artifact source
166 | 
167 | You can use dbt-loom to fetch manifest files from Snowflake Stage by setting up a `snowflake` manifest in your `dbt-loom` config. Please note that this only
168 | works for dbt-core versions 1.8.0 and newer.
169 | 
170 | ```yaml
171 | manifests:
172 |   - name: project_name
173 |     type: snowflake
174 |     config:
175 |       stage: stage_name # Stage name, can include Database/Schema
176 |       stage_path: path/to/dbt/manifest.json # Path to manifest file in the stage
177 | ```
178 | 
179 | ### Using environment variables
180 | 
181 | You can easily incorporate your own environment variables into the config file. This allows for dynamic configuration values that can change based on the environment. To specify an environment variable in the `dbt-loom` config file, use one of the following formats:
182 | 
183 | `${ENV_VAR}` or `$ENV_VAR`
184 | 
185 | #### Example:
186 | 
187 | ```yaml
188 | manifests:
189 |   - name: revenue
190 |     type: gcs
191 |     config:
192 |       project_id: ${GCP_PROJECT}
193 |       bucket_name: ${GCP_BUCKET}
194 |       object_name: ${MANIFEST_PATH}
195 | ```
196 | 
197 | ### Gzipped files
198 | 
199 | `dbt-loom` natively supports decompressing gzipped manifest files. This is useful to reduce object storage size and to minimize loading times when reading manifests from object storage. Compressed file detection is triggered when the file path for the manifest is suffixed
200 | with `.gz`.
201 | 
202 | ```yaml
203 | manifests:
204 |   - name: revenue
205 |     type: s3
206 |     config:
207 |       bucket_name: example_bucket_name
208 |       object_name: manifest.json.gz
209 | ```
210 | 
211 | ### Exclude nested packages
212 | 
213 | In some circumstances, like running `dbt-project-evaluator`, you may not want a
214 | given package in an upstream project to be imported into a downstream project.
215 | You can manually exclude downstream projects from injecting assets from packages
216 | by adding the package name to the downstream project's `excluded_packages` list.
217 | 
218 | ```yaml
219 | manifests:
220 |   - name: revenue
221 |     type: file
222 |     config:
223 |       path: ../revenue/target/manifest.json
224 |     excluded_packages:
225 |       # Provide the string name of the package to exclude during injection.
226 |       - dbt_project_evaluator
227 | ```
228 | 
229 | ### Optional manifests
230 | 
231 | If you want to allow a manifest reference to be missing (e.g. using dbt-loom for an upstream project to see dependencies), you can set `optional: true` for that manifest entry. When `optional` is true and the manifest file does not exist, dbt-loom will skip loading it without raising an error. If `optional` is false or omitted (the default), missing manifests will cause an error.
232 | 
233 | ```yaml
234 | manifests:
235 |   - name: revenue
236 |     type: file
237 |     config:
238 |       path: ../revenue/target/manifest.json
239 |     optional: true  # If the manifest file is missing, do not raise an error
240 | ```
241 | 
242 | ## Known Caveats
243 | 
244 | Cross-project dependencies are a relatively new development, and dbt-core plugins
245 | are still in beta. As such there are a number of caveats to be aware of when using
246 | this tool.
247 | 
248 | 1. dbt plugins are only supported in dbt-core version 1.6.0-b8 and newer. This means you must be using a dbt adapter
249 |    compatible with this version.
250 | 2. `PluginNodeArgs` are not fully-realized dbt `ManifestNode`s, so documentation generated by `dbt docs generate` may
251 |    be sparse when viewing injected models.
252 | 


--------------------------------------------------------------------------------
/dbt_loom/__init__.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | import os
  3 | import re
  4 | from pathlib import Path
  5 | from typing import Callable, Dict, Optional, Set
  6 | 
  7 | import yaml
  8 | from dbt.contracts.graph.node_args import ModelNodeArgs
  9 | from dbt.contracts.graph.nodes import ModelNode
 10 | 
 11 | from dbt.plugins.manager import dbt_hook, dbtPlugin
 12 | from dbt.plugins.manifest import PluginNodes
 13 | from dbt.config.project import VarProvider
 14 | 
 15 | from dbt_loom.shims import is_invalid_private_ref, is_invalid_protected_ref
 16 | 
 17 | try:
 18 |     from dbt.artifacts.resources.types import NodeType
 19 | except ModuleNotFoundError:
 20 |     from dbt.node_types import NodeType  # type: ignore
 21 | 
 22 | 
 23 | from dbt_loom.config import dbtLoomConfig
 24 | from dbt_loom.logging import fire_event
 25 | from dbt_loom.manifests import ManifestLoader, ManifestNode
 26 | 
 27 | import importlib.metadata
 28 | 
 29 | 
 30 | @dataclass
 31 | class LoomModelNodeArgs(ModelNodeArgs):
 32 |     """A dbt-loom extension of ModelNodeArgs to preserve resource types across lineages."""
 33 | 
 34 |     resource_type: NodeType = NodeType.Model
 35 |     group: Optional[str] = None
 36 | 
 37 |     def __init__(self, **kwargs):
 38 |         super().__init__(
 39 |             **{
 40 |                 key: value
 41 |                 for key, value in kwargs.items()
 42 |                 if key not in ("resource_type", "group")
 43 |             }
 44 |         )
 45 |         self.resource_type = kwargs.get("resource_type", NodeType.Model)
 46 |         self.group = kwargs.get("group")
 47 | 
 48 |     @property
 49 |     def unique_id(self) -> str:
 50 |         unique_id = f"{self.resource_type}.{self.package_name}.{self.name}"
 51 |         if self.version:
 52 |             unique_id = f"{unique_id}.v{self.version}"
 53 | 
 54 |         return unique_id
 55 | 
 56 | 
 57 | def identify_node_subgraph(manifest) -> Dict[str, ManifestNode]:
 58 |     """
 59 |     Identify all nodes that should be selected from the manifest, and return ManifestNodes.
 60 |     """
 61 | 
 62 |     output = {}
 63 | 
 64 |     # We're going to temporarily allow all nodes here.
 65 |     for unique_id in manifest["nodes"].keys():
 66 |         if unique_id.split(".")[0] in (NodeType.Test.value, NodeType.Macro.value):
 67 |             continue
 68 | 
 69 |         node = manifest.get("nodes", {}).get(unique_id)
 70 | 
 71 |         if not node:
 72 |             continue
 73 | 
 74 |         if node.get("access") is None:
 75 |             node["access"] = node.get("config", {}).get("access", "protected")
 76 | 
 77 |         # Versions may be floats or strings. Standardize on strings for compatibility.
 78 |         for key in ("version", "latest_version"):
 79 |             if node.get(key):
 80 |                 node[key] = str(node[key])
 81 | 
 82 |         output[unique_id] = ManifestNode(**(node))
 83 | 
 84 |     return output
 85 | 
 86 | 
 87 | def convert_model_nodes_to_model_node_args(
 88 |     selected_nodes: Dict[str, ManifestNode],
 89 | ) -> Dict[str, LoomModelNodeArgs]:
 90 |     """Generate a dictionary of ModelNodeArgs based on a dictionary of ModelNodes"""
 91 |     return {
 92 |         unique_id: LoomModelNodeArgs(
 93 |             schema=node.schema_name,
 94 |             identifier=node.identifier,
 95 |             **(node.dump()),
 96 |         )
 97 |         for unique_id, node in selected_nodes.items()
 98 |         if node is not None
 99 |     }
100 | 
101 | 
102 | @dataclass
103 | class LoomRunnableConfig:
104 |     """A shim class to allow is_invalid_*_ref functions to correctly handle access for loom-injected models."""
105 | 
106 |     restrict_access: bool = True
107 |     vars: VarProvider = VarProvider(vars={})
108 | 
109 | 
110 | class dbtLoom(dbtPlugin):
111 |     """
112 |     dbtLoom is a dbt plugin that loads manifest files, parses a DAG from the manifest,
113 |     and injects public nodes from imported manifest.
114 |     """
115 | 
116 |     def __init__(self, project_name: str):
117 |         # Log the version of dbt-loom being initialized
118 |         fire_event(
119 |             msg=f'Initializing dbt-loom={importlib.metadata.version("dbt-loom")}'
120 |         )
121 | 
122 |         configuration_path = Path(
123 |             os.environ.get("DBT_LOOM_CONFIG", "dbt_loom.config.yml")
124 |         )
125 | 
126 |         self._manifest_loader = ManifestLoader()
127 |         self.manifests: Dict[str, Dict] = {}
128 | 
129 |         self.config: Optional[dbtLoomConfig] = self.read_config(configuration_path)
130 |         self.models: Dict[str, LoomModelNodeArgs] = {}
131 | 
132 |         self._patch_ref_protection()
133 | 
134 |         if not self.config or (self.config and not self.config.enable_telemetry):
135 |             self._patch_plugin_telemetry()
136 | 
137 |         super().__init__(project_name)
138 | 
139 |     def _patch_ref_protection(self) -> None:
140 |         """Patch out the ref protection functions for proper protections"""
141 |         import dbt.contracts.graph.manifest
142 | 
143 |         fire_event(
144 |             msg="dbt-loom: Patching ref protection methods to support dbt-loom dependencies."
145 |         )
146 | 
147 |         dbt.contracts.graph.manifest.Manifest.is_invalid_protected_ref = (  # type: ignore
148 |             self.dependency_wrapper(is_invalid_protected_ref)
149 |         )
150 |         dbt.contracts.graph.manifest.Manifest.is_invalid_private_ref = (  # type: ignore
151 |             self.dependency_wrapper(is_invalid_private_ref)
152 |         )
153 | 
154 |         dbt.parser.manifest.ManifestLoader.check_valid_group_config_node = (  # type: ignore
155 |             self.group_validation_wrapper(
156 |                 dbt.parser.manifest.ManifestLoader.check_valid_group_config_node  # type: ignore
157 |             )
158 |         )
159 | 
160 |         dbt.contracts.graph.nodes.ModelNode.from_args = (  # type: ignore
161 |             self.model_node_wrapper(dbt.contracts.graph.nodes.ModelNode.from_args)  # type: ignore
162 |         )
163 | 
164 |     def _patch_plugin_telemetry(self) -> None:
165 |         """Patch the plugin telemetry function to prevent tracking of dbt plugins."""
166 |         import dbt.tracking
167 | 
168 |         dbt.tracking.track = self.tracking_wrapper(dbt.tracking.track)
169 | 
170 |     def tracking_wrapper(self, function) -> Callable:
171 |         """Wrap the telemetry `track` function and return early if we're tracking plugin actions."""
172 | 
173 |         def outer_function(*args, **kwargs):
174 |             """Check the context of the snowplow tracker message for references to loom. Return if present."""
175 | 
176 |             if any(
177 |                 [
178 |                     self.__class__.__name__ in str(context_item.__dict__)
179 |                     or "dbt-loom" in str(context_item.__dict__)
180 |                     or "dbt_loom" in str(context_item.__dict__)
181 |                     for context_item in kwargs.get("context", [])
182 |                 ]
183 |             ):
184 |                 return
185 | 
186 |             return function(*args, **kwargs)
187 | 
188 |         return outer_function
189 | 
190 |     def model_node_wrapper(self, function) -> Callable:
191 |         """Wrap the ModelNode.from_args function and inject extra properties from the LoomModelNodeArgs."""
192 | 
193 |         def outer_function(args: LoomModelNodeArgs) -> ModelNode:
194 |             model = function(args)
195 |             model.group = args.group
196 |             return model
197 | 
198 |         return outer_function
199 | 
200 |     def group_validation_wrapper(self, function) -> Callable:
201 |         """Wrap the check_valid_group_config_node function to inject upstream group names."""
202 | 
203 |         def outer_function(
204 |             inner_self, groupable_node, valid_group_names: Set[str]
205 |         ) -> bool:
206 |             new_groups: Set[str] = {
207 |                 model.group for model in self.models.values() if model.group is not None
208 |             }
209 | 
210 |             return function(
211 |                 inner_self, groupable_node, valid_group_names.union(new_groups)
212 |             )
213 | 
214 |         return outer_function
215 | 
216 |     def dependency_wrapper(self, function) -> Callable:
217 |         def outer_function(inner_self, node, target_model, dependencies) -> bool:
218 |             if self.config is not None:
219 |                 for manifest_name in self.manifests.keys():
220 |                     if manifest_name in dependencies:
221 |                         continue
222 | 
223 |                     dependencies[manifest_name] = LoomRunnableConfig()
224 | 
225 |             return function(inner_self, node, target_model, dependencies)
226 | 
227 |         return outer_function
228 | 
229 |     def get_groups(self) -> Set[str]:
230 |         """Get all groups defined in injected models."""
231 | 
232 |         return {
233 |             model.group for model in self.models.values() if model.group is not None
234 |         }
235 | 
236 |     def read_config(self, path: Path) -> Optional[dbtLoomConfig]:
237 |         """Read the dbt-loom configuration file."""
238 |         if not path.exists():
239 |             fire_event(
240 |                 msg=f"dbt-loom: Config file `{path}` does not exist"
241 |             )
242 |             return None
243 | 
244 |         with open(path) as file:
245 |             config_content = file.read()
246 | 
247 |         config_content = self.replace_env_variables(config_content)
248 | 
249 |         return dbtLoomConfig(**yaml.load(config_content, yaml.SafeLoader))
250 | 
251 |     @staticmethod
252 |     def replace_env_variables(config_str: str) -> str:
253 |         """Replace environment variable placeholders in the configuration string."""
254 |         pattern = r"\$(\w+)|\$\{([^}]+)\}"
255 |         return re.sub(
256 |             pattern,
257 |             lambda match: os.environ.get(
258 |                 match.group(1) if match.group(1) is not None else match.group(2), ""
259 |             ),
260 |             config_str,
261 |         )
262 | 
263 |     def initialize(self) -> None:
264 |         """Initialize the plugin"""
265 | 
266 |         if self.models != {} or not self.config:
267 |             return
268 | 
269 |         for manifest_reference in self.config.manifests:
270 |             fire_event(
271 |                 msg=f"dbt-loom: Loading manifest for `{manifest_reference.name}`"
272 |                 f" from `{manifest_reference.type.value}`"
273 |             )
274 | 
275 |             manifest = self._manifest_loader.load(manifest_reference)
276 |             if manifest is None:
277 |                 continue
278 | 
279 |             # Find the official project name from the manifest metadata and use that as the manifests key.
280 |             manifest_name = manifest.get("metadata", {}).get(
281 |                 "project_name", manifest_reference.name
282 |             )
283 |             self.manifests[manifest_name] = manifest
284 | 
285 |             selected_nodes = identify_node_subgraph(manifest)
286 | 
287 |             # Remove nodes from excluded packages.
288 |             filtered_nodes = {
289 |                 key: value
290 |                 for key, value in selected_nodes.items()
291 |                 if value.package_name not in manifest_reference.excluded_packages
292 |             }
293 | 
294 |             loom_nodes = convert_model_nodes_to_model_node_args(filtered_nodes)
295 | 
296 |             self.models.update(loom_nodes)
297 | 
298 |     @dbt_hook
299 |     def get_nodes(self) -> PluginNodes:
300 |         """
301 |         Inject PluginNodes to dbt for injection into dbt's DAG.
302 |         """
303 |         fire_event(msg="dbt-loom: Injecting nodes")
304 |         return PluginNodes(models=self.models)  # type: ignore
305 | 
306 | 
307 | plugins = [dbtLoom]
308 | 


--------------------------------------------------------------------------------
/dbt_loom/clients/az_blob.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import gzip
 4 | from io import BytesIO
 5 | from typing import Dict
 6 | 
 7 | from azure.identity import DefaultAzureCredential
 8 | from azure.storage.blob import BlobServiceClient
 9 | from pydantic import BaseModel
10 | 
11 | 
12 | class AzureReferenceConfig(BaseModel):
13 |     """Configuration for an reference stored in Azure Storage"""
14 | 
15 |     container_name: str
16 |     object_name: str
17 |     account_name: str
18 | 
19 | 
20 | class AzureClient:
21 |     """A client for loading manifest files from Azure storage."""
22 | 
23 |     def __init__(
24 |         self, container_name: str, object_name: str, account_name: str
25 |     ) -> None:
26 |         self.account_name = account_name
27 |         self.container_name = container_name
28 |         self.object_name = object_name
29 | 
30 |     def load_manifest(self) -> Dict:
31 |         """Load the manifest.json file from Azure storage."""
32 | 
33 |         connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
34 |         try:
35 |             if connection_string:
36 |                 blob_service_client = BlobServiceClient.from_connection_string(
37 |                     connection_string
38 |                 )
39 |             else:
40 |                 account_url = f"{self.account_name}.blob.core.windows.net"
41 |                 blob_service_client = BlobServiceClient(
42 |                     account_url, credential=DefaultAzureCredential()
43 |                 )
44 |             blob_client = blob_service_client.get_blob_client(
45 |                 container=self.container_name, blob=self.object_name
46 |             )
47 |         except Exception as e:
48 |             raise Exception(
49 |                 "Unable to connect to Azure. Please confirm your credentials, connection details, and network."
50 |             )
51 | 
52 |         # Deserialize the body of the object.
53 |         try:
54 |             if self.object_name.endswith('.gz'):
55 |                 with gzip.GzipFile(fileobj=BytesIO(blob_client.download_blob().readall())) as gzipfile:
56 |                     content = gzipfile.read().decode('utf-8')
57 |             else:
58 |                 content = blob_client.download_blob(encoding="utf-8").readall()
59 |         except Exception:
60 |             raise Exception(
61 |                 f"Unable to read the data contained in the object `{self.object_name}"
62 |             )
63 | 
64 |         try:
65 |             return json.loads(content)
66 |         except Exception:
67 |             raise Exception(
68 |                 f"The object `{self.object_name}` does not contain valid JSON."
69 |             )
70 | 


--------------------------------------------------------------------------------
/dbt_loom/clients/dbt_cloud.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | from pydantic import BaseModel
 5 | import requests
 6 | 
 7 | from dbt_loom.logging import fire_event
 8 | 
 9 | 
10 | class DbtCloudReferenceConfig(BaseModel):
11 |     """Configuration for a dbt Cloud reference."""
12 | 
13 |     account_id: int
14 |     job_id: int
15 |     api_endpoint: Optional[str] = None
16 |     step: Optional[int] = None
17 | 
18 | 
19 | class DbtCloud:
20 |     """API Client for dbt Cloud. Fetches latest manifest for a given dbt job."""
21 | 
22 |     def __init__(
23 |         self,
24 |         account_id: int,
25 |         token: Optional[str] = None,
26 |         api_endpoint: Optional[str] = None,
27 |     ) -> None:
28 |         resolved_token = token or os.environ.get("DBT_CLOUD_API_TOKEN")
29 |         if resolved_token is None:
30 |             raise Exception(
31 |                 "A DBT Cloud token must be provided to dbt-loom when fetching manifest "
32 |                 "data from dbt Cloud. Please provide one via the `DBT_CLOUD_API_TOKEN` "
33 |                 "environment variable."
34 |             )
35 | 
36 |         self.__token: str = resolved_token
37 | 
38 |         self.account_id = account_id
39 |         self.api_endpoint = api_endpoint or "https://cloud.getdbt.com/api/v2"
40 | 
41 |     def _query(self, endpoint: str, **kwargs) -> Dict:
42 |         """Query the dbt Cloud Administrative API."""
43 |         url = f"{self.api_endpoint}/{endpoint}"
44 |         fire_event(msg=f"Querying {url}")
45 |         response = requests.get(
46 |             url,
47 |             headers={
48 |                 "authorization": "Bearer " + self.__token,
49 |                 "content-type": "application/json",
50 |             },
51 |             **kwargs,
52 |         )
53 |         return response.json()
54 | 
55 |     def _get_manifest(self, run_id: int, step: Optional[int] = None) -> Dict[str, Any]:
56 |         """Get the manifest json for a given dbt Cloud run."""
57 |         params = {}
58 |         if step:
59 |             params["step"] = step
60 | 
61 |         return self._query(
62 |             f"accounts/{self.account_id}/runs/{run_id}/artifacts/manifest.json",
63 |             params=params,
64 |         )
65 | 
66 |     def _get_latest_run(self, job_id: int) -> Dict[str, Any]:
67 |         """Get the latest run performed by a dbt Cloud job."""
68 |         return self._query(
69 |             f"accounts/{self.account_id}/runs/",
70 |             params={
71 |                 "job_definition_id": job_id,
72 |                 "status": 10,
73 |                 "order_by": "-finished_at",
74 |                 "limit": 1,
75 |             },
76 |         )["data"][0]
77 | 
78 |     def get_models(self, job_id: int, step: Optional[int] = None) -> Dict[str, Any]:
79 |         """Get the latest state of all models by Job ID."""
80 |         latest_run = self._get_latest_run(job_id=job_id)
81 |         return self._get_manifest(run_id=latest_run["id"], step=step)
82 | 


--------------------------------------------------------------------------------
/dbt_loom/clients/gcs.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import gzip
 3 | from io import BytesIO
 4 | from pathlib import Path
 5 | from typing import Dict, Optional
 6 | 
 7 | from google.cloud import storage
 8 | from pydantic import BaseModel
 9 | 
10 | 
11 | class GCSReferenceConfig(BaseModel):
12 |     """Configuration for a GCS reference"""
13 | 
14 |     project_id: str
15 |     bucket_name: str
16 |     object_name: str
17 |     credentials: Optional[Path] = None
18 | 
19 | 
20 | class GCSClient:
21 |     """Client for GCS. Fetches manifest for a given bucket."""
22 | 
23 |     def __init__(
24 |         self,
25 |         project_id: str,
26 |         bucket_name: str,
27 |         object_name: str,
28 |         credentials: Optional[Path] = None,
29 |     ) -> None:
30 |         self.project_id = project_id
31 |         self.bucket_name = bucket_name
32 |         self.object_name = object_name
33 |         self.credentials = credentials
34 | 
35 |     def load_manifest(self) -> Dict:
36 |         """Load a manifest json from a GCS bucket."""
37 |         client = (
38 |             storage.Client.from_service_account_json(
39 |                 self.credentials, project=self.project_id
40 |             )
41 |             if self.credentials
42 |             else storage.Client(project=self.project_id)
43 |         )
44 |         bucket = client.get_bucket(self.bucket_name)
45 |         blob = bucket.get_blob(self.object_name)
46 |         if not blob:
47 |             raise Exception(
48 |                 f"The object `{self.object_name}` does not exist in bucket "
49 |                 f"`{self.bucket_name}`."
50 |             )
51 | 
52 |         if self.object_name.endswith('.gz'):
53 |             compressed_manifest = blob.download_as_bytes()
54 |             with gzip.GzipFile(fileobj=BytesIO(compressed_manifest)) as gzip_file:
55 |                 manifest_json = gzip_file.read()
56 |         else:
57 |             manifest_json = blob.download_as_text()
58 | 
59 |         try:
60 |             return json.loads(manifest_json)
61 |         except Exception:
62 |             raise Exception(
63 |                 f"The object `{self.object_name}` does not contain valid JSON."
64 |             )
65 | 


--------------------------------------------------------------------------------
/dbt_loom/clients/s3.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import Dict, Optional
 4 | 
 5 | import boto3
 6 | import gzip
 7 | from io import BytesIO
 8 | from pydantic import BaseModel
 9 | 
10 | 
11 | class S3ReferenceConfig(BaseModel):
12 |     """Configuration for an reference stored in S3"""
13 | 
14 |     bucket_name: str
15 |     object_name: str
16 |     credentials: Optional[Path] = None
17 | 
18 | 
19 | class S3Client:
20 |     """A client for loading manifest files from S3-compatible object stores."""
21 | 
22 |     def __init__(self, bucket_name: str, object_name: str) -> None:
23 |         self.bucket_name = bucket_name
24 |         self.object_name = object_name
25 | 
26 |     def load_manifest(self) -> Dict:
27 |         """Load the manifest.json file from an S3 bucket."""
28 | 
29 |         client = boto3.client("s3")
30 | 
31 |         # TODO: Determine if I need to add args for SSE
32 |         try:
33 |             response = client.get_object(Bucket=self.bucket_name, Key=self.object_name)
34 |         except client.exceptions.NoSuchBucket:
35 |             raise Exception(f"The bucket `{self.bucket_name}` does not exist.")
36 |         except client.exceptions.NoSuchKey:
37 |             raise Exception(
38 |                 f"The object `{self.object_name}` does not exist in bucket "
39 |                 f"`{self.bucket_name}`."
40 |             )
41 | 
42 |         # Deserialize the body of the object.
43 |         try:
44 |             if self.object_name.endswith(".gz"):
45 |                 body = response["Body"].read()
46 |                 with gzip.GzipFile(fileobj=BytesIO(body)) as gzipfile:
47 |                     content = gzipfile.read().decode('utf-8')
48 |             else:
49 |                 content = response["Body"].read().decode("utf-8")
50 |         except Exception:
51 |             raise Exception(
52 |                 f"Unable to read the data contained in the object `{self.object_name}"
53 |             )
54 | 
55 |         try:
56 |             return json.loads(content)
57 |         except Exception:
58 |             raise Exception(
59 |                 f"The object `{self.object_name}` does not contain valid JSON."
60 |             )
61 | 


--------------------------------------------------------------------------------
/dbt_loom/clients/snowflake_stage.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import json
 3 | import tempfile
 4 | from pathlib import Path, PurePosixPath
 5 | from typing import Dict
 6 | 
 7 | from dbt.config.runtime import load_profile
 8 | from dbt.flags import get_flags
 9 | from dbt_loom.logging import fire_event
10 | from pydantic import BaseModel
11 | 
12 | 
13 | class SnowflakeReferenceConfig(BaseModel):
14 |     """Configuration for an reference stored in Snowflake Stage"""
15 | 
16 |     stage: str
17 |     stage_path: str
18 | 
19 | 
20 | class SnowflakeClient:
21 |     """A client for loading manifest files from Snowflake Stage."""
22 | 
23 |     def __init__(self, stage: str, stage_path: str) -> None:
24 |         self.stage = stage
25 |         self.stage_path = stage_path.lstrip("/")
26 | 
27 |     def load_manifest(self) -> Dict:
28 |         """Load the manifest.json file from Snowflake stage."""
29 | 
30 |         try:
31 |             from dbt.adapters.snowflake import SnowflakeAdapter
32 |         except ImportError as exception:
33 |             fire_event(
34 |                 msg="dbt-core: Fatal error. Expected to find dbt-snowflake "
35 |                 "installed to support loading the manifest from a Snowflake "
36 |                 "stage.",
37 |             )
38 |             raise exception
39 | 
40 |         try:
41 |             from dbt.mp_context import get_mp_context
42 |         except ImportError as exception:
43 |             fire_event(
44 |                 msg="dbt-core: Fatal error. Unable to initialize a Snowflake "
45 |                 "adapter. Loading from Snowflake stages requires dbt-core "
46 |                 "1.8.0 and newer."
47 |             )
48 |             raise exception
49 | 
50 |         flags = get_flags()
51 |         profile = load_profile(
52 |             project_root=flags.PROJECT_DIR,
53 |             cli_vars=flags.VARS,
54 |             profile_name_override=flags.PROFILE,
55 |             target_override=flags.TARGET,
56 |         )
57 |         adapter = SnowflakeAdapter(profile, get_mp_context())
58 |         file_name = str(PurePosixPath(self.stage_path).name)
59 |         tmp_dir = tempfile.mkdtemp(prefix="dbt_loom_")
60 | 
61 |         # Snowflake needs '/' path separators
62 |         tmp_dir_sf = tmp_dir.replace("\\", "/")
63 | 
64 |         with adapter.connection_named("dbt-loom"):
65 |             get_query = f"get @{self.stage}/{self.stage_path} file://{tmp_dir_sf}/"
66 |             response, table = adapter.connections.execute(get_query)
67 |             if response.rows_affected == 0:
68 |                 raise Exception(
69 |                     f"Failed to get file {self.stage}/{self.stage_path}: {response}"
70 |                 )
71 | 
72 |         download_path = Path(tmp_dir) / file_name
73 | 
74 |         if download_path.name.endswith(".gz"):
75 |             with gzip.GzipFile(download_path) as gzip_file:
76 |                 content = gzip_file.read().decode("utf-8")
77 |         else:
78 |             with download_path.open("r") as f:
79 |                 content = f.read()
80 | 
81 |         return json.loads(content)
82 | 


--------------------------------------------------------------------------------
/dbt_loom/config.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from pathlib import Path
 3 | import re
 4 | from typing import List, Union
 5 | from urllib.parse import ParseResult, urlparse
 6 | 
 7 | from pydantic import BaseModel, Field, validator
 8 | 
 9 | from dbt_loom.clients.az_blob import AzureReferenceConfig
10 | from dbt_loom.clients.dbt_cloud import DbtCloudReferenceConfig
11 | from dbt_loom.clients.gcs import GCSReferenceConfig
12 | from dbt_loom.clients.s3 import S3ReferenceConfig
13 | from dbt_loom.clients.snowflake_stage import SnowflakeReferenceConfig
14 | 
15 | 
16 | class ManifestReferenceType(str, Enum):
17 |     """Type of ManifestReference"""
18 | 
19 |     file = "file"
20 |     dbt_cloud = "dbt_cloud"
21 |     gcs = "gcs"
22 |     s3 = "s3"
23 |     azure = "azure"
24 |     snowflake = "snowflake"
25 | 
26 | 
27 | class FileReferenceConfig(BaseModel):
28 |     """Configuration for a file reference"""
29 | 
30 |     path: ParseResult
31 | 
32 |     @validator("path", pre=True, always=True)
33 |     def default_path(cls, v, values) -> ParseResult:
34 |         """
35 |         Check if the provided path is a valid URL. If not, convert it into an
36 |         absolute file path.
37 |         """
38 | 
39 |         if isinstance(v, ParseResult):
40 |             return v
41 | 
42 |         if bool(re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", v)):
43 |             return urlparse(v)
44 | 
45 |         return urlparse(Path(v).absolute().as_uri())
46 | 
47 | 
48 | class ManifestReference(BaseModel):
49 |     """Reference information for a manifest to be loaded into dbt-loom."""
50 | 
51 |     name: str
52 |     type: ManifestReferenceType
53 |     config: Union[
54 |         FileReferenceConfig,
55 |         DbtCloudReferenceConfig,
56 |         GCSReferenceConfig,
57 |         S3ReferenceConfig,
58 |         AzureReferenceConfig,
59 |         SnowflakeReferenceConfig,
60 |     ]
61 |     excluded_packages: List[str] = Field(default_factory=list)
62 |     optional: bool = False
63 | 
64 | 
65 | class dbtLoomConfig(BaseModel):
66 |     """Configuration for dbt Loom"""
67 | 
68 |     manifests: List[ManifestReference]
69 |     enable_telemetry: bool = False
70 | 
71 | 
72 | class LoomConfigurationError(BaseException):
73 |     """Error raised when dbt-loom has been misconfigured."""
74 | 


--------------------------------------------------------------------------------
/dbt_loom/logging.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import dbt_common.events.functions as dbt_event_function
 3 |     from dbt_common.events.types import Note
 4 | except ModuleNotFoundError:
 5 |     import dbt.events.functions as dbt_event_function  # type: ignore
 6 |     from dbt.events.types import Note  # type: ignore
 7 | 
 8 | 
 9 | def fire_event(*args, **kwargs) -> None:
10 |     """Fire a dbt-core event."""
11 |     dbt_event_function.fire_event(Note(*args, **kwargs))
12 | 


--------------------------------------------------------------------------------
/dbt_loom/manifests.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from io import BytesIO
  3 | import json
  4 | import gzip
  5 | import os
  6 | from pathlib import Path
  7 | from typing import Dict, List, Optional
  8 | from urllib.parse import unquote, urlunparse
  9 | 
 10 | from pydantic import BaseModel, Field, validator
 11 | import requests
 12 | 
 13 | from dbt_loom.clients.snowflake_stage import SnowflakeReferenceConfig, SnowflakeClient
 14 | 
 15 | try:
 16 |     from dbt.artifacts.resources.types import NodeType
 17 | except ModuleNotFoundError:
 18 |     from dbt.node_types import NodeType  # type: ignore
 19 | 
 20 | from dbt_loom.clients.az_blob import AzureClient, AzureReferenceConfig
 21 | from dbt_loom.clients.dbt_cloud import DbtCloud, DbtCloudReferenceConfig
 22 | from dbt_loom.clients.gcs import GCSClient, GCSReferenceConfig
 23 | from dbt_loom.clients.s3 import S3Client, S3ReferenceConfig
 24 | from dbt_loom.config import (
 25 |     FileReferenceConfig,
 26 |     LoomConfigurationError,
 27 |     ManifestReference,
 28 |     ManifestReferenceType,
 29 | )
 30 | 
 31 | 
 32 | class DependsOn(BaseModel):
 33 |     """Wrapper for storing dependencies"""
 34 | 
 35 |     nodes: List[str] = Field(default_factory=list)
 36 |     macros: List[str] = Field(default_factory=list)
 37 | 
 38 | 
 39 | class ManifestNode(BaseModel, use_enum_values=True):
 40 |     """A basic ManifestNode that can be referenced across projects."""
 41 | 
 42 |     name: str
 43 |     package_name: str
 44 |     unique_id: str
 45 |     resource_type: NodeType
 46 |     schema_name: str = Field(alias="schema")
 47 |     database: Optional[str] = None
 48 |     relation_name: Optional[str] = None
 49 |     version: Optional[str] = None
 50 |     latest_version: Optional[str] = None
 51 |     deprecation_date: Optional[datetime.datetime] = None
 52 |     access: Optional[str] = "protected"
 53 |     group: Optional[str] = None
 54 |     generated_at: datetime.datetime = Field(default_factory=datetime.datetime.utcnow)
 55 |     depends_on: Optional[DependsOn] = None
 56 |     depends_on_nodes: List[str] = Field(default_factory=list)
 57 |     enabled: bool = True
 58 | 
 59 |     @validator("depends_on_nodes", always=True)
 60 |     def default_depends_on_nodes(cls, v, values):
 61 |         depends_on = values.get("depends_on")
 62 |         if depends_on is None:
 63 |             return []
 64 | 
 65 |         return [
 66 |             node for node in depends_on.nodes if node.split(".")[0] not in ("source")
 67 |         ]
 68 | 
 69 |     @validator("resource_type", always=True)
 70 |     def fix_resource_types(cls, v, values):
 71 |         """If the resource type does not match the unique_id prefix, then rewrite the resource type."""
 72 | 
 73 |         node_type = values.get("unique_id").split(".")[0]
 74 |         if v != node_type:
 75 |             return node_type
 76 |         return v
 77 | 
 78 |     @property
 79 |     def identifier(self) -> str:
 80 |         if not self.relation_name:
 81 |             return self.name
 82 | 
 83 |         return self.relation_name.split(".")[-1].replace('"', "").replace("`", "")
 84 | 
 85 |     def dump(self) -> Dict:
 86 |         """Dump the ManifestNode to a Dict, with support for pydantic 1 and 2"""
 87 |         exclude_set = {"schema_name", "depends_on", "node_config", "unique_id"}
 88 |         if hasattr(self, "model_dump"):
 89 |             return self.model_dump(exclude=exclude_set)  # type: ignore
 90 | 
 91 |         return self.dict(exclude=exclude_set)
 92 | 
 93 | 
 94 | class UnknownManifestPathType(Exception):
 95 |     """Raised when the ManifestLoader receives a FileReferenceConfig with a path that does not have a known URL scheme."""
 96 | 
 97 | 
 98 | class InvalidManifestPath(Exception):
 99 |     """Raised when the ManifestLoader receives a FileReferenceConfig with an invalid path."""
100 | 
101 | 
102 | class ManifestLoader:
103 |     def __init__(self):
104 |         self.loading_functions = {
105 |             ManifestReferenceType.file: self.load_from_path,
106 |             ManifestReferenceType.dbt_cloud: self.load_from_dbt_cloud,
107 |             ManifestReferenceType.gcs: self.load_from_gcs,
108 |             ManifestReferenceType.s3: self.load_from_s3,
109 |             ManifestReferenceType.azure: self.load_from_azure,
110 |             ManifestReferenceType.snowflake: self.load_from_snowflake,
111 |         }
112 | 
113 |     @staticmethod
114 |     def load_from_path(config: FileReferenceConfig) -> Dict:
115 |         """
116 |         Load a manifest dictionary based on a FileReferenceConfig. This config's
117 |         path can point to either a local file or a URL to a remote location.
118 |         """
119 | 
120 |         if config.path.scheme in ("http", "https"):
121 |             return ManifestLoader.load_from_http(config)
122 | 
123 |         if config.path.scheme in ("file"):
124 |             return ManifestLoader.load_from_local_filesystem(config)
125 | 
126 |         raise UnknownManifestPathType()
127 | 
128 |     @staticmethod
129 |     def load_from_local_filesystem(config: FileReferenceConfig) -> Dict:
130 |         """Load a manifest dictionary from a local file"""
131 | 
132 |         if not config.path.path:
133 |             raise InvalidManifestPath()
134 | 
135 |         if config.path.netloc:
136 |             file_path = Path(f"//{config.path.netloc}{config.path.path}")
137 |         else:
138 |             file_path = Path(
139 |                 unquote(
140 |                     config.path.path.lstrip("/")
141 |                     if os.name == "nt"
142 |                     else config.path.path
143 |                 )
144 |             )
145 | 
146 |         if not file_path.exists():
147 |             raise LoomConfigurationError(f"The path `{file_path}` does not exist.")
148 | 
149 |         if file_path.suffix == ".gz":
150 |             with gzip.open(file_path, "rt") as file:
151 |                 return json.load(file)
152 | 
153 |         return json.load(open(file_path))
154 | 
155 |     @staticmethod
156 |     def load_from_http(config: FileReferenceConfig) -> Dict:
157 |         """Load a manifest dictionary from a local file"""
158 | 
159 |         if not config.path.path:
160 |             raise InvalidManifestPath()
161 | 
162 |         response = requests.get(urlunparse(config.path), stream=True)
163 |         response.raise_for_status()  # Check for request errors
164 | 
165 |         # Check for compression on the file. If compressed, store it in a buffer
166 |         # and decompress it.
167 |         if (
168 |             config.path.path.endswith(".gz")
169 |             or response.headers.get("Content-Encoding") == "gzip"
170 |         ):
171 |             with gzip.GzipFile(fileobj=BytesIO(response.content)) as gz_file:
172 |                 return json.load(gz_file)
173 | 
174 |         return response.json()
175 | 
176 |     @staticmethod
177 |     def load_from_dbt_cloud(config: DbtCloudReferenceConfig) -> Dict:
178 |         """Load a manifest dictionary from dbt Cloud."""
179 |         client = DbtCloud(
180 |             account_id=config.account_id, api_endpoint=config.api_endpoint
181 |         )
182 | 
183 |         return client.get_models(config.job_id, step=config.step)
184 | 
185 |     @staticmethod
186 |     def load_from_gcs(config: GCSReferenceConfig) -> Dict:
187 |         """Load a manifest dictionary from a GCS bucket."""
188 |         gcs_client = GCSClient(
189 |             project_id=config.project_id,
190 |             bucket_name=config.bucket_name,
191 |             object_name=config.object_name,
192 |             credentials=config.credentials,
193 |         )
194 | 
195 |         return gcs_client.load_manifest()
196 | 
197 |     @staticmethod
198 |     def load_from_s3(config: S3ReferenceConfig) -> Dict:
199 |         """Load a manifest dictionary from an S3-compatible bucket."""
200 |         gcs_client = S3Client(
201 |             bucket_name=config.bucket_name,
202 |             object_name=config.object_name,
203 |         )
204 | 
205 |         return gcs_client.load_manifest()
206 | 
207 |     @staticmethod
208 |     def load_from_azure(config: AzureReferenceConfig) -> Dict:
209 |         """Load a manifest dictionary from Azure storage."""
210 |         azure_client = AzureClient(
211 |             container_name=config.container_name,
212 |             object_name=config.object_name,
213 |             account_name=config.account_name,
214 |         )
215 | 
216 |         return azure_client.load_manifest()
217 | 
218 |     @staticmethod
219 |     def load_from_snowflake(config: SnowflakeReferenceConfig) -> Dict:
220 |         """Load a manifest dictionary from Snowflake stage."""
221 |         snowflake_client = SnowflakeClient(
222 |             stage=config.stage, stage_path=config.stage_path
223 |         )
224 | 
225 |         return snowflake_client.load_manifest()
226 | 
227 |     def load(self, manifest_reference: ManifestReference) -> Dict:
228 |         """Load a manifest dictionary based on a ManifestReference input."""
229 | 
230 |         if manifest_reference.type not in self.loading_functions:
231 |             raise LoomConfigurationError(
232 |                 f"The manifest reference provided for {manifest_reference.name} does "
233 |                 "not have a valid type."
234 |             )
235 | 
236 |         try:
237 |             manifest = self.loading_functions[manifest_reference.type](
238 |                 manifest_reference.config
239 |             )
240 |         except LoomConfigurationError as e:
241 |             if getattr(manifest_reference, "optional", False):
242 |                 return None
243 |             raise
244 | 
245 |         return manifest
246 | 


--------------------------------------------------------------------------------
/dbt_loom/shims.py:
--------------------------------------------------------------------------------
 1 | from typing import Mapping, Optional
 2 | from dbt.contracts.graph.nodes import GraphMemberNode, ModelNode
 3 | from dbt.contracts.graph.manifest import MaybeNonSource
 4 | 
 5 | try:
 6 |     from dbt.artifacts.resources.types import NodeType, AccessType
 7 | except ModuleNotFoundError:
 8 |     from dbt.node_types import NodeType, AccessType  # type: ignore
 9 | 
10 | 
11 | def is_invalid_protected_ref(
12 |     self,
13 |     node: GraphMemberNode,
14 |     target_model: MaybeNonSource,
15 |     dependencies: Optional[Mapping],
16 | ) -> bool:
17 |     dependencies = dependencies or {}
18 |     if not isinstance(target_model, ModelNode):
19 |         return False
20 | 
21 |     is_protected_ref = (
22 |         target_model.access == AccessType.Protected
23 |         # don't raise this reference error for ad hoc 'preview' queries
24 |         and node.resource_type != NodeType.SqlOperation
25 |         and node.resource_type != NodeType.RPCCall  # TODO: rm
26 |     )
27 |     target_dependency = dependencies.get(target_model.package_name)
28 |     restrict_package_access = (
29 |         target_dependency.restrict_access if target_dependency else False
30 |     )
31 | 
32 |     return is_protected_ref and (
33 |         node.package_name != target_model.package_name and restrict_package_access
34 |     )
35 | 
36 | 
37 | def is_invalid_private_ref(
38 |     self,
39 |     node: GraphMemberNode,
40 |     target_model: MaybeNonSource,
41 |     dependencies: Optional[Mapping],
42 | ) -> bool:
43 |     dependencies = dependencies or {}
44 |     if not isinstance(target_model, ModelNode):
45 |         return False
46 | 
47 |     is_private_ref = (
48 |         target_model.access == AccessType.Private
49 |         # don't raise this reference error for ad hoc 'preview' queries
50 |         and node.resource_type != NodeType.SqlOperation
51 |         and node.resource_type != NodeType.RPCCall  # TODO: rm
52 |     )
53 |     target_dependency = dependencies.get(target_model.package_name)
54 |     restrict_package_access = (
55 |         target_dependency.restrict_access if target_dependency else False
56 |     )
57 | 
58 |     return is_private_ref and (
59 |         # Invalid reference because the group does not match
60 |         (hasattr(node, "group") and node.group and node.group != target_model.group)  # type: ignore
61 |         # Or, invalid because these are different namespaces (project/package) and restrict-access is enforced
62 |         or (node.package_name != target_model.package_name and restrict_package_access)
63 |     )
64 | 


--------------------------------------------------------------------------------
/docs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | The dbt-loom community exists to provide a free, unencumbered, and
  6 | vendor-agnostic means of enabling multi-project deployments of dbt-core.
  7 | We accept that for this project to be successful, we must create a
  8 | community that is open, curious, and accepting of each others differences.
  9 | 
 10 | We as members, contributors, and leaders pledge to make participation in our
 11 | community a harassment-free experience for everyone, regardless of age, body
 12 | size, visible or invisible disability, ethnicity, sex characteristics, gender
 13 | identity and expression, level of experience, education, socio-economic status,
 14 | nationality, personal appearance, race, religion, sexual identity
 15 | and orientation, or employer.
 16 | 
 17 | We pledge to act and interact in ways that contribute to an open, welcoming,
 18 | diverse, inclusive, and healthy community.
 19 | 
 20 | ## Our Standards
 21 | 
 22 | Examples of behavior that contributes to a positive environment for our
 23 | community include:
 24 | 
 25 | - Demonstrating empathy and kindness toward other people
 26 | - Being respectful of differing opinions, viewpoints, and experiences
 27 | - Giving and gracefully accepting constructive feedback
 28 | - Accepting responsibility and apologizing to those affected by our mistakes,
 29 |   and learning from the experience
 30 | - Focusing on what is best not just for us as individuals or for specific
 31 |   corporate interests, but for the overall community
 32 | 
 33 | Examples of unacceptable behavior include:
 34 | 
 35 | - The use of sexualized language or imagery, and sexual attention or
 36 |   advances of any kind
 37 | - Trolling, insulting or derogatory comments, and personal or political attacks
 38 | - Public or private harassment
 39 | - Publishing others' private information, such as a physical or email
 40 |   address, without their explicit permission
 41 | - Other conduct which could reasonably be considered inappropriate in a
 42 |   professional setting
 43 | 
 44 | ## Enforcement Responsibilities
 45 | 
 46 | Community leaders are responsible for clarifying and enforcing our standards of
 47 | acceptable behavior and will take appropriate and fair corrective action in
 48 | response to any behavior that they deem inappropriate, threatening, offensive,
 49 | or harmful.
 50 | 
 51 | Community leaders have the right and responsibility to remove, edit, or reject
 52 | comments, commits, code, wiki edits, issues, and other contributions that are
 53 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 54 | decisions when appropriate.
 55 | 
 56 | ## Scope
 57 | 
 58 | This Code of Conduct applies within all community spaces, and also applies when
 59 | an individual is officially representing the community in public spaces.
 60 | Examples of representing our community include using an official email address,
 61 | posting via an official social media account, or acting as an appointed
 62 | representative at an online or offline event.
 63 | 
 64 | ## Enforcement
 65 | 
 66 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 67 | reported to the community leaders responsible for enforcement at
 68 | yager@nicholasyager.com. All complaints will be reviewed and investigated
 69 | promptly and fairly.
 70 | 
 71 | All community leaders are obligated to respect the privacy and security of the
 72 | reporter of any incident.
 73 | 
 74 | ## Enforcement Guidelines
 75 | 
 76 | Community leaders will follow these Community Impact Guidelines in determining
 77 | the consequences for any action they deem in violation of this Code of Conduct:
 78 | 
 79 | ### 1. Correction
 80 | 
 81 | **Community Impact**: Use of inappropriate language or other behavior deemed
 82 | unprofessional or unwelcome in the community.
 83 | 
 84 | **Consequence**: A private, written warning from community leaders, providing
 85 | clarity around the nature of the violation and an explanation of why the
 86 | behavior was inappropriate. A public apology may be requested.
 87 | 
 88 | ### 2. Warning
 89 | 
 90 | **Community Impact**: A violation through a single incident or series
 91 | of actions.
 92 | 
 93 | **Consequence**: A warning with consequences for continued behavior. No
 94 | interaction with the people involved, including unsolicited interaction with
 95 | those enforcing the Code of Conduct, for a specified period of time. This
 96 | includes avoiding interactions in community spaces as well as external channels
 97 | like social media. Violating these terms may lead to a temporary or
 98 | permanent ban.
 99 | 
100 | ### 3. Temporary Ban
101 | 
102 | **Community Impact**: A serious violation of community standards, including
103 | sustained inappropriate behavior.
104 | 
105 | **Consequence**: A temporary ban from any sort of interaction or public
106 | communication with the community for a specified period of time. No public or
107 | private interaction with the people involved, including unsolicited interaction
108 | with those enforcing the Code of Conduct, is allowed during this period.
109 | Violating these terms may lead to a permanent ban.
110 | 
111 | ### 4. Permanent Ban
112 | 
113 | **Community Impact**: Demonstrating a pattern of violation of community
114 | standards, including sustained inappropriate behavior, harassment of an
115 | individual, or aggression toward or disparagement of classes of individuals.
116 | 
117 | **Consequence**: A permanent ban from any sort of public interaction within
118 | the community.
119 | 
120 | ## Attribution
121 | 
122 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
123 | version 2.0, available at
124 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
125 | 
126 | Community Impact Guidelines were inspired by
127 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
128 | 
129 | For answers to common questions about this code of conduct, see the FAQ at
130 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available
131 | at [https://www.contributor-covenant.org/translations][translations].
132 | 
133 | [homepage]: https://www.contributor-covenant.org
134 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
135 | [Mozilla CoC]: https://github.com/mozilla/diversity
136 | [FAQ]: https://www.contributor-covenant.org/faq
137 | [translations]: https://www.contributor-covenant.org/translations
138 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to dbt-loom
 2 | 
 3 | Thank you for taking the time to contribute! 🎉💃
 4 | 
 5 | The following is a set of guidelines for contributing to dbt-loom. These are
 6 | mostly guidelines, not rules. Use your best judgment, and feel free to propose
 7 | changes to this document in a pull request.
 8 | 
 9 | ## How Can I Contribute?
10 | 
11 | ### Reporting Bugs
12 | 
13 | If you find a bug, please report it by opening an issue on GitHub. Make sure to
14 | include:
15 | 
16 | - A clear and descriptive title.
17 | - Steps to reproduce the problem.
18 | - Expected behavior.
19 | - Actual behavior.
20 | - Any relevant logs or screenshots.
21 | 
22 | ### Suggesting Enhancements
23 | 
24 | If you have an idea to enhance dbt-loom, we'd love to hear about it! Please
25 | open an issue on GitHub and include:
26 | 
27 | - A clear and descriptive title.
28 | - A detailed description of the proposed enhancement.
29 | - Any relevant use cases or examples.
30 | 
31 | ### Pull Requests
32 | 
33 | When you're ready to start working on an issue, fork the repository and create
34 | a new branch for your work. Follow these steps:
35 | 
36 | 1. Fork the repository and clone your fork.
37 | 2. Create a new branch: `git checkout -b feature/my-feature-branch`.
38 | 3. Make your changes and commit them: `git commit -m 'Add some feature'`.
39 | 4. Push to the branch: `git push origin feature/my-feature-branch`.
40 | 5. Open a pull request.
41 | 
42 | After creating the pull request, the PR will automatically notify the
43 | maintainers, and they will be able to trigger CI checks for your change.
44 | 
45 | ### Code Style
46 | 
47 | - Follow the existing code style.
48 | - Ensure your code passes all tests, including mypy.
49 | - Write tests for your code if applicable.
50 | 
51 | ### Running Tests
52 | 
53 | Make sure all tests pass before submitting a pull request. You can run the
54 | tests with:
55 | 
56 | ```
57 | pytest tests/
58 | ```
59 | 
60 | ### Documentation
61 | 
62 | Contributions to documentation are always welcome. If you see something that can be improved or needs clarification, feel free to make changes.
63 | 
64 | ## Code of Conduct
65 | 
66 | This project adheres to the [Contributor Covenant Code of Conduct](docs/CODE_OF_CONDUCT.md).
67 | By participating, you are expected to uphold this code.
68 | 
69 | ## Getting Help
70 | 
71 | If you need help or have any questions, feel free to open an issue on GitHub.
72 | 
73 | Thank you for contributing!
74 | 


--------------------------------------------------------------------------------
/docs/advanced-configuration.md:
--------------------------------------------------------------------------------
 1 | # Advanced Configuration
 2 | 
 3 | `dbt-loom` also has a couple advanced configuration options for power users.
 4 | 
 5 | ## Using environment variables in the `dbt-loom` config
 6 | 
 7 | You can easily incorporate your own environment variables into the config file. This allows for dynamic configuration values that can change based on the environment. To specify an environment variable in the `dbt-loom` config file, use one of the following formats:
 8 | 
 9 | `${ENV_VAR}` or `$ENV_VAR`
10 | 
11 | ### Example:
12 | 
13 | ```yaml
14 | manifests:
15 |   - name: revenue
16 |     type: gcs
17 |     config:
18 |       project_id: ${GCP_PROJECT}
19 |       bucket_name: ${GCP_BUCKET}
20 |       object_name: ${MANIFEST_PATH}
21 | ```
22 | 
23 | ## Exclude nested packages
24 | 
25 | In some circumstances, like running `dbt-project-evaluator`, you may not want a
26 | given package in an upstream project to be imported into a downstream project.
27 | You can manually exclude downstream projects from injecting assets from packages
28 | by adding the package name to the downstream project's `excluded_packages` list.
29 | 
30 | ```yaml
31 | manifests:
32 |   - name: revenue
33 |     type: file
34 |     config:
35 |       path: ../revenue/target/manifest.json
36 |     excluded_packages:
37 |       # Provide the string name of the package to exclude during injection.
38 |       - dbt_project_evaluator
39 | ```
40 | 
41 | ## Gzipped files
42 | 
43 | `dbt-loom` natively supports decompressing gzipped manifest files. This is useful to reduce object storage size and to minimize loading times when reading manifests from object storage. Compressed file detection is triggered when the file path for the manifest is suffixed
44 | with `.gz`.
45 | 
46 | ```yaml
47 | manifests:
48 |   - name: revenue
49 |     type: s3
50 |     config:
51 |       bucket_name: example_bucket_name
52 |       object_name: manifest.json.gz
53 | ```
54 | 
55 | ## Enabling Telemetry
56 | 
57 | By default, the `dbt-loom` plugin blocks outbound telemetry that reports on
58 | the use of this plugin. This is a privacy-preserving measure for `dbt-loom`
59 | users that does not impact the function of dbt-core and does not impede
60 | dbt-core development in any way. If you _want_ this telemetry to be sent, you
61 | can re-enable this behavior by setting the `enable_telemetry` property
62 | in the `dbt_loom.config.yml` file.
63 | 
64 | ```yaml
65 | enable_telemetry: true
66 | manifests: ...
67 | ```
68 | 


--------------------------------------------------------------------------------
/docs/getting-started.md:
--------------------------------------------------------------------------------
  1 | # Getting Started
  2 | 
  3 | To begin, install the `dbt-loom` python package.
  4 | 
  5 | ```console
  6 | pip install dbt-loom
  7 | ```
  8 | 
  9 | Next, create a `dbt-loom` configuration file. This configuration file provides the paths for your
 10 | upstream project's manifest files.
 11 | 
 12 | ```yaml
 13 | manifests:
 14 |   - name: project_name # This should match the project's real name
 15 |     type: file
 16 |     config:
 17 |       # A path to your manifest. This can be either a local path, or a remote
 18 |       # path accessible via http(s).
 19 |       path: path/to/manifest.json
 20 | ```
 21 | 
 22 | By default, `dbt-loom` will look for `dbt_loom.config.yml` in your working directory. You can also set the
 23 | `DBT_LOOM_CONFIG` environment variable.
 24 | 
 25 | ## Using dbt Cloud as an artifact source
 26 | 
 27 | You can use dbt-loom to fetch model definitions from dbt Cloud by setting up a `dbt-cloud` manifest in your `dbt-loom` config, and setting the `DBT_CLOUD_API_TOKEN` environment variable in your execution environment.
 28 | 
 29 | ```yaml
 30 | manifests:
 31 |   - name: project_name
 32 |     type: dbt_cloud
 33 |     config:
 34 |       account_id: <YOUR DBT CLOUD ACCOUNT ID>
 35 | 
 36 |       # Job ID pertains to the job that you'd like to fetch artifacts from.
 37 |       job_id: <REFERENCE JOB ID>
 38 | 
 39 |       api_endpoint: <DBT CLOUD ENDPOINT>
 40 |       # dbt Cloud has multiple regions with different URLs. Update this to
 41 |       # your appropriate dbt cloud endpoint.
 42 | 
 43 |       step_id: <JOB STEP>
 44 |       # If your job generates multiple artifacts, you can set the step from
 45 |       # which to fetch artifacts. Defaults to the last step.
 46 | ```
 47 | 
 48 | ## Using an S3-compatible object store as an artifact source
 49 | 
 50 | You can use dbt-loom to fetch manifest files from S3-compatible object stores
 51 | by setting up ab `s3` manifest in your `dbt-loom` config. Please note that this
 52 | approach supports all standard boto3-compatible environment variables and authentication mechanisms. Please see the [boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables) for more details.
 53 | 
 54 | ```yaml
 55 | manifests:
 56 |   - name: project_name
 57 |     type: s3
 58 |     config:
 59 |       bucket_name: <YOUR S3 BUCKET NAME>
 60 |       # The name of the bucket where your manifest is stored.
 61 | 
 62 |       object_name: <YOUR OBJECT NAME>
 63 |       # The object name of your manifest file.
 64 | ```
 65 | 
 66 | ## Using GCS as an artifact source
 67 | 
 68 | You can use dbt-loom to fetch manifest files from Google Cloud Storage by setting up a `gcs` manifest in your `dbt-loom` config.
 69 | 
 70 | ```yaml
 71 | manifests:
 72 |   - name: project_name
 73 |     type: gcs
 74 |     config:
 75 |       project_id: <YOUR GCP PROJECT ID>
 76 |       # The alphanumeric ID of the GCP project that contains your target bucket.
 77 | 
 78 |       bucket_name: <YOUR GCS BUCKET NAME>
 79 |       # The name of the bucket where your manifest is stored.
 80 | 
 81 |       object_name: <YOUR OBJECT NAME>
 82 |       # The object name of your manifest file.
 83 | 
 84 |       credentials: <PATH TO YOUR SERVICE ACCOUNT JSON CREDENTIALS>
 85 |       # The OAuth2 Credentials to use. If not passed, falls back to the default inferred from the environment.
 86 | ```
 87 | 
 88 | ## Using Azure Storage as an artifact source
 89 | 
 90 | You can use dbt-loom to fetch manifest files from Azure Storage
 91 | by setting up an `azure` manifest in your `dbt-loom` config. The `azure` type implements
 92 | the [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python)
 93 | class, supporting all environment variables and authentication mechanisms.
 94 | Alternatively, set the `AZURE_STORAGE_CONNECTION_STRING` environment variable to
 95 | authenticate via a connection string.
 96 | 
 97 | ```yaml
 98 | manifests:
 99 |   - name: project_name
100 |     type: azure
101 |     config:
102 |       account_name: <YOUR AZURE STORAGE ACCOUNT NAME> # The name of your Azure Storage account
103 |       container_name: <YOUR AZURE STORAGE CONTAINER NAME> # The name of your Azure Storage container
104 |       object_name: <YOUR OBJECT NAME> # The object name of your manifest file.
105 | ```
106 | 
107 | ## Using Snowflake Stage as an artifact source
108 | 
109 | You can use dbt-loom to fetch manifest files from Snowflake Stage by setting up a `snowflake` manifest in your `dbt-loom` config. Please note that this only
110 | works for dbt-core versions 1.8.0 and newer.
111 | 
112 | ```yaml
113 | manifests:
114 |   - name: project_name
115 |     type: snowflake
116 |     config:
117 |       stage: stage_name # Stage name, can include Database/Schema
118 |       stage_path: path/to/dbt/manifest.json # Path to manifest file in the stage
119 | ```
120 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # dbt-loom
 2 | 
 3 | `dbt-loom` is a dbt Core plugin that weaves together multi-project deployments. It works by fetching public model definitions from your dbt artifacts, and injecting those models into your dbt project.
 4 | 
 5 | ```mermaid
 6 | flowchart LR
 7 | 
 8 |    subgraph TOP[Your Infrastructure]
 9 |     direction TB
10 |     dbt_runtime[dbt Core]
11 |     proprietary_plugin[Open Source Metadata Plugin]
12 | 
13 |     files[Local and Remote Files]
14 |     object_storage[Object Storage]
15 |     discovery_api[dbt Cloud APIs]
16 | 
17 |     discovery_api --> proprietary_plugin
18 |     files --> proprietary_plugin
19 |     object_storage --> proprietary_plugin
20 |     proprietary_plugin --> dbt_runtime
21 |   end
22 | 
23 |   Project --> TOP --> Warehouse
24 | ```
25 | 
26 | dbt-loom currently supports obtaining model definitions from:
27 | 
28 | - Local manifest files
29 | - Remote manifest files via http(s)
30 | - dbt Cloud
31 | - GCS
32 | - S3-compatible object storage services
33 | - Azure Storage
34 | 
35 | ## How does it work?
36 | 
37 | As of dbt-core 1.6.0-b8, there now exists a `dbtPlugin` class which defines functions that can
38 | be called by dbt-core's `PluginManger`. During different parts of the dbt-core lifecycle (such as graph linking and
39 | manifest writing), the `PluginManger` will be called and all plugins registered with the appropriate hook will be executed.
40 | 
41 | dbt-loom implements a `get_nodes` hook, and uses a configuration file to parse manifests, identify public models, and
42 | inject those public models when called by `dbt-core`.
43 | 
44 | ## Known Caveats
45 | 
46 | Cross-project dependencies are a relatively new development, and dbt-core plugins
47 | are still in beta. As such there are a number of caveats to be aware of when using
48 | this tool.
49 | 
50 | 1. dbt plugins are only supported in dbt-core version 1.6.0-b8 and newer. This means you must be using a dbt adapter
51 |    compatible with this version.
52 | 2. `PluginNodeArgs` are not fully-realized dbt `ManifestNode`s, so documentation generated by `dbt docs generate` may
53 |    be sparse when viewing injected models.
54 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: dbt_loom
 2 | 
 3 | theme:
 4 |   palette:
 5 |     # Palette toggle for light mode
 6 |     - media: "(prefers-color-scheme: light)"
 7 |       scheme: default
 8 |       primary: custom
 9 |       accent: custom
10 |       toggle:
11 |         icon: material/brightness-7
12 |         name: Switch to dark mode
13 | 
14 |     # Palette toggle for dark mode
15 |     - media: "(prefers-color-scheme: dark)"
16 |       scheme: slate
17 |       primary: custom
18 |       accent: custom
19 |       toggle:
20 |         icon: material/brightness-4
21 |         name: Switch to light mode
22 | 
23 |   #   primary: black
24 |   name: material
25 |   features:
26 |     - navigation.footer
27 |     - navigation.instant
28 |     - navigation.tracking
29 |     - content.action.edit
30 |     - toc.integrate # check feedback
31 | 
32 | extra:
33 |   version:
34 |     provider: mike
35 | 
36 | markdown_extensions:
37 |   - attr_list # needed to allow providing width
38 |   - md_in_html # to allow Markdown in details
39 |   - toc:
40 |       toc_depth: 3
41 |       permalink: "#"
42 |   - pymdownx.highlight:
43 |       anchor_linenums: true
44 |       line_spans: __span
45 |       pygments_lang_class: true
46 |   - pymdownx.inlinehilite
47 |   - pymdownx.snippets
48 |   - pymdownx.superfences:
49 |       custom_fences:
50 |         - name: mermaid
51 |           class: mermaid
52 |           format: !!python/name:pymdownx.superfences.fence_code_format
53 |   - pymdownx.details # allow collapsible blocks
54 |   - admonition
55 | 
56 | repo_url: https://github.com/nicholasyager/dbt-loom
57 | repo_name: nicholasyager/dbt-loom
58 | edit_uri: edit/main/docs/
59 | 
60 | nav:
61 |   - Home: index.md
62 |   - Getting started: getting-started.md
63 |   - Advanced configuration: advanced-configuration.md
64 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "dbt-loom"
 3 | version = "0.8.0"
 4 | description = "A dbt-core plugin to import public nodes in multi-project deployments."
 5 | authors = ["Nicholas Yager <yager@nicholasyager.com>"]
 6 | readme = "README.md"
 7 | packages = [{ include = "dbt_loom" }]
 8 | 
 9 | [tool.commitizen]
10 | version = "0.8.0"
11 | version_files = ["pyproject.toml:^version"]
12 | 
13 | [tool.poetry.dependencies]
14 | python = ">=3.9,<4.0"
15 | dbt-core = ">=1.6.0,<1.10.0"
16 | requests = "^2.31.0"
17 | google-cloud-storage = "^2.13.0"
18 | boto3 = "^1.28.84"
19 | azure-storage-blob = "^12.19.0"
20 | azure-identity = "^1.15.0"
21 | types-pyyaml = "^6.0.12.12"
22 | types-networkx = "^3.2.1.20240313"
23 | 
24 | [tool.poetry.group.dev.dependencies]
25 | ruff = "^0.3.0"
26 | pytest = "^7.4.0"
27 | isort = "^5.12.0"
28 | dbt-duckdb = ">=1.6.0,<1.10.0"
29 | duckdb = ">=0.8.0"
30 | pre-commit = "^3.6.0"
31 | mypy = "^1.8.0"
32 | 
33 | [tool.poetry.extras]
34 | snowflake = ["dbt-snowflake"]
35 | 
36 | [tool.poetry.group.docs.dependencies]
37 | mkdocs-material = "^9.5.45"
38 | mike = "^2.1.3"
39 | 
40 | [tool.ruff]
41 | line-length = 88
42 | 
43 | [tool.isort]
44 | force_grid_wrap = 0 # Resolve conflict with Black
45 | line_length = 88    # Comply with Ruff and Black
46 | 
47 | [tool.pytest.ini_options]
48 | testpaths = ["tests"]
49 | 
50 | [build-system]
51 | requires = ["poetry-core"]
52 | build-backend = "poetry.core.masonry.api"
53 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | target/
 3 | dbt_packages/
 4 | logs/
 5 | *.duckdb
 6 | *.duckdb.wal
 7 | reports/sources/*.csv
 8 | .meltano
 9 | .DS_Store
10 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: end-of-file-fixer
 7 |       - id: trailing-whitespace
 8 |       - id: requirements-txt-fixer
 9 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
10 |     rev: v0.0.245
11 |     hooks:
12 |       - id: ruff
13 |         args: [--fix, --exit-non-zero-on-fix]
14 |   - repo: https://github.com/pre-commit/mirrors-eslint
15 |     rev: v8.34.0
16 |     hooks:
17 |       - id: eslint
18 |   - repo: https://github.com/sqlfluff/sqlfluff
19 |     rev: "2.0.0a4"
20 |     hooks:
21 |       - id: sqlfluff-lint
22 |         additional_dependencies:
23 |           ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"]
24 |       - id: sqlfluff-fix
25 |         additional_dependencies:
26 |           ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"]
27 |   - repo: https://github.com/psf/black
28 |     rev: "23.1.0"
29 |     hooks:
30 |       - id: black
31 |   # - repo: https://github.com/pre-commit/mirrors-prettier
32 |   #   rev: "" # Use the sha or tag you want to point at
33 |   #   hooks:
34 |   #     - id: prettier
35 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/.sqlfluff:
--------------------------------------------------------------------------------
 1 | [sqlfluff]
 2 | dialect = duckdb
 3 | templater = dbt
 4 | runaway_limit = 10
 5 | max_line_length = 80
 6 | indent_unit = space
 7 | 
 8 | [sqlfluff:indentation]
 9 | tab_space_size = 4
10 | 
11 | [sqlfluff:layout:type:comma]
12 | spacing_before = touch
13 | line_position = trailing
14 | 
15 | [sqlfluff:rules:capitalisation.keywords]
16 | capitalisation_policy = lower
17 | 
18 | [sqlfluff:rules:aliasing.table]
19 | aliasing = explicit
20 | 
21 | [sqlfluff:rules:aliasing.column]
22 | aliasing = explicit
23 | 
24 | [sqlfluff:rules:aliasing.expression]
25 | allow_scalar = False
26 | 
27 | [sqlfluff:rules:capitalisation.identifiers]
28 | extended_capitalisation_policy = lower
29 | 
30 | [sqlfluff:rules:capitalisation.functions]
31 | capitalisation_policy = lower
32 | 
33 | [sqlfluff:rules:capitalisation.literals]
34 | capitalisation_policy = lower
35 | 
36 | [sqlfluff:rules:ambiguous.column_references]  # Number in group by
37 | group_by_and_order_by_style = implicit
38 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/.sqlfluffignore:
--------------------------------------------------------------------------------
1 | reports
2 | target
3 | dbt_packages
4 | macros
5 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/README.md:
--------------------------------------------------------------------------------
  1 | # 🥪 The Jaffle Shop 🦘
  2 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/dbt-labs/jaffle-shop-template?quickstart=1)
  3 | [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/dbt-labs/jaffle-shop-template)
  4 | 
  5 | This is a template for creating a fully functional dbt project for teaching, learning, writing, demoing, or any other scenarios where you need a basic project with a synthesized jaffle shop business. We recommend beginners use the following steps to open this project right here on GitHub in a Codespace. If you're a little more experienced with devcontainers and want to go faster 🏎️, you can use the Gitpod link above for a quicker startup and deeper feature set.
  6 | 
  7 | ## How to use
  8 | 
  9 | ### 1. Click the big green 'Use this template' button and 'Create a new repository'.
 10 | 
 11 | ![Click use template](.github/static/use-template.gif)
 12 | 
 13 | This will create a new repository exactly like this one, and navigate you there. Make sure to execute the next instructions in that repo.
 14 | 
 15 | ### 2. Click 'Code', then 'Codespaces, then 'Create codespace on main'.
 16 | 
 17 | ![Create codespace on main](.github/static/open-codespace.gif)
 18 | 
 19 | This will create a new `codespace`, a sandboxed devcontainer with everything you need for a dbt project. Once the codespace is finished setting up, you'll be ready to run a `dbt build`.
 20 | 
 21 | ### 3. Make sure to wait til the codespace is finished setting up.
 22 | 
 23 | ![Codespaces setup screen at postCreateCommand](.github/static/codespaces-setup-screen.png)
 24 | 
 25 | After the container is built and connected to, VSCode will run a few clean up commands and then a `postCreateCommand`, a set of commands run after the container is set up. This is where we install our dependencies, such as dbt, the duckdb adapter, and other necessities, as well as run `dbt deps` to install the dbt packages we want to use. That screen will look something like the above. When it's completed it will close and leave you in a fresh terminal prompt. From there you're ready to do some analytics engineering!
 26 | 
 27 | ## Additional included tools
 28 | 
 29 | This template includes two additional tools for the other parts of the stack to create a more realistic experience:
 30 | 
 31 | - BI reporting built with [Evidence](https://evidence.dev) - an open source, code-based BI tool to write reports with markdown and SQL.
 32 | - EL with [Meltano](https://meltano.com/) - an open source tool that provides a CLI & version control for ELT pipelines.
 33 | 
 34 | ### Evidence
 35 | 
 36 | With Evidence you can:
 37 | 
 38 | - Version control your BI layer
 39 | - Build reports in the same repo as your dbt project
 40 | - Deploy your reports to a static site
 41 | 
 42 | #### Running Evidence
 43 | 
 44 | To run Evidence, use:
 45 | 
 46 | ```shell
 47 | cd reports
 48 | npm run dev
 49 | ```
 50 | 
 51 | See the [Evidence CLI docs](https://docs.evidence.dev/cli) for more details.
 52 | 
 53 | You can make changes to the markdown pages in the `reports/pages` folder and see the reports update in the browser preview.
 54 | 
 55 | #### Learning More about Evidence
 56 | 
 57 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence)
 58 | - [Project Home Page](https://www.evidence.dev)
 59 | - [Github](https://github.com/evidence-dev/evidence)
 60 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)
 61 | 
 62 | ### Meltano
 63 | 
 64 | This project is preconfigured with Meltano, which can be used to extract and load raw data into DuckDB.
 65 | 
 66 | #### Run EL (Extract and Load) using Meltano
 67 | 
 68 | ```console
 69 | meltano run tap-jaffle-shop target-duckdb
 70 | ```
 71 | 
 72 | Optionally, you can modify extract parameters using environment variables. For instance, this modified version will extract five years of data instead of the default 1 year.
 73 | 
 74 | ```console
 75 | TAP_JAFFLE_SHOP_YEARS=5
 76 | meltano run tap-jaffle-shop target-duckdb
 77 | ```
 78 | 
 79 | You can also modify any tap or target config with the interactive `config` command:
 80 | 
 81 | ```console
 82 | meltano config tap-jaffle-shop set --interactive
 83 | meltano config target-duckdb set --interactive
 84 | ```
 85 | 
 86 | ## Local development
 87 | 
 88 | This project is optimized for running in a container. If you'd like to use it locally outside of container you'll need to follow the instructions below.
 89 | 
 90 | 1. Create a python virtual environment and install the dependencies.
 91 | 
 92 | ```console
 93 | python3 -m venv .venv
 94 | source .venv/bin/activate
 95 | pip install -r requirements.txt
 96 | ```
 97 | 
 98 | 2. Install meltano with [pipx](https://pypa.github.io/pipx/installation/). And install meltano's dependencies.
 99 | 
100 | ```console
101 | pipx install meltano
102 | meltano install
103 | ```
104 | 
105 | 3. Run the EL pipeline.
106 | 
107 | ```console
108 | meltano run el
109 | ```
110 | 
111 | 4. Install dbt dependencies and build the dbt project.
112 | 
113 | ```console
114 | dbt deps
115 | dbt build
116 | ```
117 | 
118 | 5. Install Evidence dependencies and run the Evidence server.
119 | 
120 | ```console
121 | cd reports
122 | npm install
123 | npm run dev
124 | ```
125 | 
126 | ## Contributing
127 | 
128 | We welcome issues and PRs requesting or adding new features. The package that generates the synthetic data, [`jafgen`](https://pypi.org/project/jafgen/), is also under active development, and will add more types of source data to model as we go along. If you have tests, descriptions, new models, metrics, materializations types, or techniques you use this repo to demonstrate, which you feel would make for a more expansive baseline experience, we encourage you to consider contributing them back in so that this project becomes an even better collective tool for exploring and learning dbt over time.
129 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | 
3 | tasks:
4 |   deps:
5 |     cmds:
6 |       - python -m pip install --progress-bar off -r requirements.txt
7 |       - dbt deps
8 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/analyses/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/analyses/.gitkeep


--------------------------------------------------------------------------------
/test_projects/customer_success/dbt_loom.config.yml:
--------------------------------------------------------------------------------
1 | manifests:
2 |   - name: potato
3 |     type: file
4 |     config:
5 |       path: ../revenue/target/manifest.json
6 |     excluded_packages:
7 |       - dbt_project_evaluator
8 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | # Name your project! Project names should contain only lowercase characters
 2 | # and underscores. A good package name should reflect your organization's
 3 | # name or the intended use of these models
 4 | name: "customer_success"
 5 | version: "1.0.0"
 6 | config-version: 2
 7 | 
 8 | # This setting configures which "profile" dbt uses for this project.
 9 | profile: "customer_success"
10 | 
11 | # These configurations specify where dbt should look for different types of files.
12 | # The `model-paths` config, for example, states that models in this project can be
13 | # found in the "models/" directory. You probably won't need to change these!
14 | model-paths: ["models"]
15 | analysis-paths: ["analyses"]
16 | test-paths: ["tests"]
17 | seed-paths: ["seeds"]
18 | macro-paths: ["macros"]
19 | snapshot-paths: ["snapshots"]
20 | 
21 | target-path: "target" # directory which will store compiled SQL files
22 | clean-targets: # directories to be removed by `dbt clean`
23 |   - "target"
24 |   - "dbt_packages"
25 | 
26 | vars:
27 |   truncate_timespan_to: "{{ current_timestamp() }}"
28 | 
29 | # Configuring models
30 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
31 | 
32 | # In this example config, we tell dbt to build all models in the example/ directory
33 | # as tables. These settings can be overridden in the individual model files
34 | # using the `{{ config(...) }}` macro.
35 | 
36 | models:
37 |   customer_success:
38 |     staging:
39 |       +materialized: view
40 |     marts:
41 |       +materialized: table
42 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/macros/.gitkeep


--------------------------------------------------------------------------------
/test_projects/customer_success/macros/cents_to_dollars.sql:
--------------------------------------------------------------------------------
1 | {# A basic example for a project-wide macro to cast a column uniformly #}
2 | 
3 | {% macro cents_to_dollars(column_name, precision=2) -%}
4 |     ({{ column_name }} / 100)::numeric(16, {{ precision }})
5 | {%- endmacro %}
6 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/meltano.yml:
--------------------------------------------------------------------------------
 1 | # Meltano Configuration File
 2 | #
 3 | # Sample usage:
 4 | # > meltano run tap-jaffle-shop target-duckdb
 5 | #
 6 | # Or equivalently:
 7 | # > meltano run el  # Run the job named 'el' to extract and load data
 8 | 
 9 | version: 1
10 | project_id: Jaffle Shop Template Project
11 | 
12 | env:
13 |   JAFFLE_DB_NAME: jaffle_shop
14 |   JAFFLE_RAW_SCHEMA: jaffle_raw
15 | 
16 | default_environment: dev
17 | environments:
18 |   - name: dev
19 | 
20 | plugins:
21 |   extractors:
22 |     - name: tap-jaffle-shop
23 |       namespace: tap_jaffle_shop
24 |       variant: meltanolabs
25 |       pip_url: git+https://github.com/MeltanoLabs/tap-jaffle-shop.git@v0.3.0
26 |       capabilities:
27 |         - catalog
28 |         - discover
29 |       config:
30 |         years: 2
31 |         stream_name_prefix: ${JAFFLE_RAW_SCHEMA}-raw_
32 |   loaders:
33 |     - name: target-postgres
34 |       variant: datamill-co
35 |       pip_url: git+https://github.com/datamill-co/target-postgres.git@v0.1.0
36 |       config:
37 |         host: 127.0.0.1
38 |         user: postgres
39 |         password: ${SNOWFLAKE_PASSWORD}
40 |         default_target_schema: $JAFFLE_RAW_SCHEMA
41 | 
42 | jobs:
43 |   # Sample usage:  `meltano run el`
44 |   # Equivalent to: `meltano run tap-jaffle-shop target-duckdb`
45 |   - name: el # Extract and load the raw data
46 |     tasks:
47 |       - tap-jaffle-shop target-postgres
48 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/models/marts/__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: customers
 5 |     description: Customer overview data mart, offering key details for each unique customer. One row per customer.
 6 |     columns:
 7 |       - name: customer_id
 8 |         description: The unique key of the orders mart.
 9 |         tests:
10 |           - not_null
11 |           - unique
12 |       - name: customer_name
13 |         description: Customers' full name.
14 |       - name: count_lifetime_orders
15 |         description: Total number of orders a customer has ever placed.
16 |       - name: first_ordered_at
17 |         description: The timestamp when a customer placed their first order.
18 |       - name: last_ordered_at
19 |         description: The timestamp of a customer's most recent order.
20 |       - name: lifetime_spend_pretax
21 |         description: The sum of all the pre-tax subtotals of every order a customer has placed.
22 |       - name: lifetime_spend
23 |         description: The sum of all the order totals (including tax) that a customer has ever placed.
24 |       - name: customer_type
25 |         description: Options are 'new' or 'returning', indicating if a customer has ordered more than once or has only placed their first order to date.
26 |         tests:
27 |           - accepted_values:
28 |               values: ["new", "returning"]
29 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/models/marts/customer_status_histories.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def model(dbt, session):
 5 |     # set length of time considered a churn
 6 |     pd.Timedelta(days=2)
 7 | 
 8 |     dbt.config(enabled=False, materialized="table", packages=["pandas==1.5.2"])
 9 | 
10 |     orders_relation = dbt.ref("orders")
11 | 
12 |     # converting a DuckDB Python Relation into a pandas DataFrame
13 |     orders_df = orders_relation.df()
14 | 
15 |     orders_df.sort_values(by="ordered_at", inplace=True)
16 |     orders_df["previous_order_at"] = orders_df.groupby("customer_id")[
17 |         "ordered_at"
18 |     ].shift(1)
19 |     orders_df["next_order_at"] = orders_df.groupby("customer_id")["ordered_at"].shift(
20 |         -1
21 |     )
22 |     return orders_df
23 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/models/marts/customers.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='table'
 4 |     )
 5 | }}
 6 | 
 7 | with
 8 | 
 9 | customers as (
10 | 
11 |     select * from {{ ref('stg_customers') }}
12 | 
13 | ),
14 | 
15 | orders_mart as (
16 | 
17 |     select * from {{ ref('revenue', 'orders') }}
18 | 
19 | ),
20 | 
21 | integers as (
22 |     select * from  {{ ref('revenue', 'integers') }}
23 | ),
24 | 
25 | order_summary as (
26 | 
27 |     select
28 |         customer_id,
29 | 
30 |         count(*) as count_lifetime_orders,
31 |         count(*) > 1 as is_repeat_buyer,
32 |         min(ordered_at) as first_ordered_at,
33 |         max(ordered_at) as last_ordered_at,
34 | 
35 |         sum(subtotal) as lifetime_spend_pretax,
36 |         sum(order_total) as lifetime_spend
37 | 
38 |     from orders_mart
39 |     group by 1
40 | 
41 | ),
42 | 
43 | joined as (
44 | 
45 |     select
46 |         customers.*,
47 |         order_summary.count_lifetime_orders,
48 |         order_summary.first_ordered_at,
49 |         order_summary.last_ordered_at,
50 |         order_summary.lifetime_spend_pretax,
51 |         order_summary.lifetime_spend,
52 | 
53 |         case
54 |             when order_summary.is_repeat_buyer then 'returning'
55 |             else 'new'
56 |         end as customer_type
57 | 
58 |     from customers
59 | 
60 |     left join order_summary
61 |         on customers.customer_id = order_summary.customer_id
62 | 
63 | )
64 | 
65 | select * from joined
66 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/models/staging/__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_customers
 5 |     description: Customer data with basic cleaning and transformation applied, one row per customer.
 6 |     columns:
 7 |       - name: customer_id
 8 |         description: The unique key for each customer.
 9 |         tests:
10 |           - not_null
11 |           - unique
12 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/models/staging/__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: ecom
 5 |     schema: raw
 6 |     description: E-commerce data
 7 |     tables:
 8 |       - name: raw_customers
 9 |         meta:
10 |           external_location: "read_csv('jaffle-data/raw_customers.csv', names=['id', 'name'],AUTO_DETECT=TRUE)"
11 |         description: One record per person who has purchased one or more items
12 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/models/staging/stg_customers.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | source as (
 4 | 
 5 |     select * from {{ source('ecom', 'raw_customers') }}
 6 | 
 7 | ),
 8 | 
 9 | renamed as (
10 | 
11 |     select
12 | 
13 |         ----------  ids
14 |         id as customer_id,
15 | 
16 |         ---------- properties
17 |         name as customer_name
18 | 
19 |     from source
20 | 
21 | )
22 | 
23 | select * from renamed
24 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/package-lock.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: 1.0.0
4 | sha1_hash: efa9169fb1f1a1b2c967378c02b60e3d85ae464b
5 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: 1.0.0
4 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/profiles.yml:
--------------------------------------------------------------------------------
1 | customer_success:
2 |   outputs:
3 |     dev:
4 |       type: duckdb
5 |       path: ../database.db
6 |       threads: 4
7 |   target: dev
8 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/.evidence/customization/custom-formatting.json:
--------------------------------------------------------------------------------
1 | {
2 |   "version": "1.0",
3 |   "customFormats": []
4 | }


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/.gitignore:
--------------------------------------------------------------------------------
1 | .evidence/template
2 | .svelte-kit
3 | build
4 | node_modules
5 | .DS_Store
6 | *.*duckdb
7 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/README.md:
--------------------------------------------------------------------------------
 1 | # Jaffle Shop BI Reports
 2 | 
 3 | This project uses [Evidence.dev](https://Evidence.dev) for BI reporting.
 4 | 
 5 | ## Getting Started
 6 | 
 7 | Run the BI server from your local workstation:
 8 | 
 9 | ```shell
10 | cd reports
11 | npm run dev
12 | ```
13 | 
14 | This will launch the Evidence webserver in developer mode.
15 | 
16 | ## Testing for breakages
17 | 
18 | The following command can be used to confirm that reports and queries are still valid:
19 | 
20 | ```console
21 | npm run build:strict
22 | ```
23 | 
24 | ## Updating to the latest version of Evidence
25 | 
26 | 1. Check your version against the version number for the [latest release]([Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)).
27 | 2. Run `npm install evidence-dev/evidence@latest` to bump the version in `package.json` and automatically update dependenceis in `package-lock.json`.
28 | 
29 | ## Learning More
30 | 
31 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence)
32 | - [Project Home Page](https://www.evidence.dev)
33 | - [Github](https://github.com/evidence-dev/evidence)
34 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)
35 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "jaffle-shop",
 3 |   "version": "0.0.1",
 4 |   "scripts": {
 5 |     "build": "evidence build",
 6 |     "build:strict": "evidence build:strict",
 7 |     "dev": "evidence dev --host 0.0.0.0 --open /",
 8 |     "test": "evidence build",
 9 |     "help": "evidence --help"
10 |   },
11 |   "engines": {
12 |     "npm": ">=7.0.0",
13 |     "node": ">=16.14.0"
14 |   },
15 |   "type": "module",
16 |   "dependencies": {
17 |     "@evidence-dev/evidence": "15.0.1",
18 |     "@evidence-dev/preprocess": "2.2.0",
19 |     "@evidence-dev/components": "2.2.1"
20 |   },
21 |   "overrides": {
22 |     "jsonwebtoken": "9.0.0",
23 |     "trim@<0.0.3": ">0.0.3",
24 |     "sqlite3": "5.1.5"
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/pages/analysis/seasonality-investigation.md:
--------------------------------------------------------------------------------
  1 | # Seasonality Investigation
  2 | *Written by Melissa Cranston in September 2017*
  3 | 
  4 | *Analysis covers the time period of September 2016 to August 2017. All queries have been limited to that range.*
  5 | 
  6 | [Jump to conclusions & recommendations &darr;](#Conclusions5)
  7 | 
  8 | ## Variations in Order Volume
  9 | Plotting orders per day for the last 12 months reveals 3 things:
 10 | - An unnaturally large jump in orders per day in March 2017 - this was driven by the new store opening in [Brooklyn](/stores/Brooklyn)
 11 | - A repeating pattern of spikes which might be driven by different order volumes on specific days of the week
 12 | - A drop in total orders per day around June 2017
 13 | 
 14 | ```orders_per_day
 15 | select
 16 |     date_trunc('day', ordered_at) as date,
 17 |     count(*) as orders
 18 | 
 19 | from analytics.orders
 20 | where ordered_at between '2016-09-01' and '2017-08-31'
 21 | 
 22 | group by 1
 23 | order by 1
 24 | ```
 25 | 
 26 | <LineChart
 27 |     data={orders_per_day}
 28 |     x=date
 29 |     y=orders
 30 |     yAxisTitle="orders per day"
 31 |     title="Orders per Day"
 32 | />
 33 | 
 34 | ## Day of Week
 35 | We can calculate average orders by day of week to check if there are differences in order volume across days.
 36 | 
 37 | ```orders_by_weekday
 38 | select
 39 |     date_part('dayofweek', date) as day_of_week_num,
 40 |     dayname(date) as day_of_week,
 41 |     avg(orders) as avg_orders
 42 | from ${orders_per_day}
 43 | group by 1, 2
 44 | order by day_of_week_num
 45 | ```
 46 | 
 47 | <BarChart
 48 |     data={orders_by_weekday}
 49 |     x=day_of_week
 50 |     y=avg_orders
 51 |     swapXY=true
 52 |     title="Average Orders by Day of Week"
 53 |     yAxisTitle="Avg Orders Per Day"
 54 | />
 55 | 
 56 | This reveals that weekdays generate significantly higher order volume than weekends. It also shows that orders are fairly consistent across individual days on weekdays (202-209 orders/day) and weekends (~50 orders/day).
 57 | 
 58 | ## Hour of Day
 59 | Now we'll break down orders by hour of day to see if there are patterns within days. Given the differences we just found between weekday and weekend volumes, we should split the results by those day types. We can use a loop for this.
 60 | 
 61 | ```orders_hour_of_day
 62 | with
 63 |     orders_by_hour as (
 64 |         select
 65 |             date_part('hour', ordered_at) as hour_of_day,
 66 |             if(dayname(ordered_at) in ('Sunday', 'Saturday'), 'Weekend', 'Weekday') as day_type,
 67 |             count(*)::float as orders,
 68 |             count(distinct date_trunc('day', ordered_at)) as days
 69 |         from analytics.orders
 70 |         where ordered_at between '2016-09-01' and '2017-08-31'
 71 |         group by 1, 2
 72 |         order by hour_of_day
 73 |     )
 74 | 
 75 | select
 76 |     *,
 77 |     orders / days as orders_per_hour
 78 | from orders_by_hour
 79 | ```
 80 | 
 81 | {#each ['Weekday', 'Weekend'] as day_type}
 82 | 
 83 | <BarChart
 84 |     data={orders_hour_of_day.filter(d => d.day_type === day_type)}
 85 |     x=hour_of_day
 86 |     y=orders_per_hour
 87 |     yAxisTitle=true
 88 |     xAxisTitle=true
 89 |     yMax=60
 90 |     title="{day_type} - Orders by Hour of Day"
 91 | />
 92 | 
 93 | {/each}
 94 | 
 95 | We see a significant peak in order volume between 7 and 9am on weekdays. There is also a slight increase in volume around lunch times (12-2pm) across all days of the week.
 96 | 
 97 | ## Dayparts
 98 | Based on the volumes shown above, we can break down our dayparts as:
 99 | - Breakfast: 7-9am
100 | - Late Morning: 9am-12pm
101 | - Lunch: 12-2pm
102 | - Late Afternoon: 2-5pm
103 | 
104 | In future analyses, these timeframes should be lined up with any existing operational timeframes (e.g., breakfast, lunch service windows).
105 | 
106 | ```dayparts
107 | with
108 |     orders_add_daypart as (
109 |         select
110 |             *,
111 |             case
112 |                 when hour_of_day between 7 and 8 then 'Breakfast'
113 |                 when hour_of_day between 9 and 11 then 'Late Morning'
114 |                 when hour_of_day between 12 and 14 then 'Lunch'
115 |                 when hour_of_day between 15 and 24 then 'Late Afternoon'
116 |             end as daypart
117 |         from ${orders_hour_of_day}
118 |     ),
119 | 
120 |     orders_by_daypart as (
121 |         select
122 |             daypart,
123 |             day_type,
124 |             sum(orders) / sum(days) as orders_per_hour,
125 |             sum(orders) as orders
126 |         from orders_add_daypart
127 |         group by daypart, day_type
128 |     )
129 | 
130 |     select
131 |         *,
132 |         orders / sum(orders) over () as orders_pct1
133 |     from orders_by_daypart
134 | ```
135 | 
136 | <BarChart
137 |     data={dayparts}
138 |     x=daypart
139 |     y=orders_pct1
140 |     series=day_type
141 |     yAxisTitle="% of Total Orders"
142 |     title="Breakdown of Orders by Daypart"
143 |     swapXY=true
144 | />
145 | 
146 | Almost half of all orders are generated from breakfast on weekdays. This might be driven by orders from customers who are on their way to work - a follow-up analysis on customer purchasing behaviour should be completed to investigate this.
147 | 
148 | ## Conclusions
149 | - Weekdays generate significantly more orders than weekend days (~4x more orders on an average weekday compared to an average weekend day)
150 | - Early mornings (7-9am) on weekdays generate almost half of all orders for the company
151 | - There was a drop in orders in June 2017 - this has not been covered in this analysis, but should be investigated
152 | 
153 | ### Recommended Follow-on Analyses
154 | - Investigate drop in orders in June 2017
155 | - Study customer purchasing behaviour, especially during weekday early mornings
156 | - Extend this analysis with a longer timeframe to investigate seasonality throughout the calendar year
157 | 
158 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/pages/customers/[customer].md:
--------------------------------------------------------------------------------
 1 | # {$page.params.customer}'s Customer Profile
 2 | 
 3 | ```customers
 4 | select
 5 |     *,
 6 |     first_ordered_at as first_order_longdate,
 7 |     last_ordered_at as last_order_longdate,
 8 |     lifetime_spend as lifetime_spend_usd,
 9 |     lifetime_spend / count_lifetime_orders as average_order_value_usd
10 | from analytics.customers
11 | ```
12 | 
13 | {$page.params.customer} has been a customer since <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=first_order_longdate/>, with their most recent order occurring on <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=last_order_longdate/>.
14 | 
15 | ### Key stats:
16 | - <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=count_lifetime_orders/> lifetime orders
17 | - <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=lifetime_spend_usd/> in lifetime spend
18 | - <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=average_order_value_usd/> average order value
19 | 
20 | ```monthly_purchases
21 | select
22 |     date_trunc('month', a.ordered_at) as month,
23 |     b.customer_name,
24 |     sum(a.order_total) as purchases_usd
25 | from analytics.orders a
26 | left join analytics.customers b
27 | on a.customer_id = b.customer_id
28 | group by month, customer_name
29 | order by month asc
30 | ```
31 | 
32 | <BarChart
33 |     data={monthly_purchases.filter(d => d.customer_name === $page.params.customer)}
34 |     x=month
35 |     y=purchases_usd
36 |     title="Purchases per Month by {$page.params.customer}"
37 | />


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/pages/customers/index.md:
--------------------------------------------------------------------------------
 1 | # Customers
 2 | 
 3 | ```customers
 4 | select
 5 |     customer_name,
 6 |     concat('/customers/', customer_name) as customer_link,
 7 |     count_lifetime_orders as lifetime_orders,
 8 |     lifetime_spend as lifetime_spend_usd,
 9 |     lifetime_spend / count_lifetime_orders as average_order_value_usd
10 | from analytics.customers
11 | order by lifetime_spend_usd desc
12 | ```
13 | 
14 | Click a row to see the report for that customer:
15 | <DataTable data={customers} search=true link=customer_link showLinkCol=false>
16 |     <Column id=customer_name/>
17 |     <Column id=lifetime_orders/>
18 |     <Column id=lifetime_spend_usd/>
19 |     <Column id=average_order_value_usd/>
20 | </DataTable>


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/pages/index.md:
--------------------------------------------------------------------------------
  1 | # Welcome to Jaffle Shop 🥪
  2 | 
  3 | ```monthly_stats
  4 | with
  5 | monthly_stats as (
  6 |     select
  7 |         date_trunc('month', ordered_at) as month,
  8 |         sum(order_total) as revenue_usd1k,
  9 |         count(*)::float as orders,
 10 |         count(distinct customer_id)::float as customers
 11 | 
 12 |     from analytics.orders
 13 |     group by month
 14 |     order by month desc
 15 | )
 16 | 
 17 | select
 18 |     *,
 19 |     revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1,
 20 |     orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1,
 21 |     customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1,
 22 |     monthname(month) as month_name
 23 | from monthly_stats
 24 | ```
 25 | 
 26 | <BigValue
 27 |     data={monthly_stats}
 28 |     value=revenue_usd1k
 29 |     comparison=revenue_growth_pct1
 30 |     title="Monthly Revenue"
 31 |     comparisonTitle="vs. prev. month"
 32 | />
 33 | 
 34 | <BigValue
 35 |     data={monthly_stats}
 36 |     value=orders
 37 |     comparison=order_growth_pct1
 38 |     title="Monthly Orders"
 39 |     comparisonTitle="vs. prev. month"
 40 | />
 41 | 
 42 | Jaffle Shop locations served <Value data={monthly_stats} column=customers/> happy customers in <Value data={monthly_stats} column=month_name/>. This was a change of <Value data={monthly_stats} column=customer_growth_pct1/> from <Value data={monthly_stats} column=month_name row=1/>.
 43 | 
 44 | ## Store Openings
 45 | 
 46 | ```store_opening
 47 | with
 48 | most_recent_open as (
 49 |   select
 50 |       location_name as opened_store,
 51 |       min(ordered_at) as opened_date_mmmyyyy,
 52 |       sum(order_total) as opened_revenue_usd
 53 |   from analytics.orders
 54 |   group by location_name
 55 |   order by opened_date_mmmyyyy desc
 56 |   limit 1
 57 | ),
 58 | 
 59 | company_total as (
 60 |   select
 61 |     sum(order_total) as company_revenue_usd,
 62 |   from analytics.orders
 63 |   cross join most_recent_open
 64 |   where ordered_at >= opened_date_mmmyyyy
 65 | )
 66 | 
 67 | select
 68 |   *,
 69 |   opened_revenue_usd / company_revenue_usd as revenue_pct
 70 | from most_recent_open
 71 | cross join company_total
 72 | ```
 73 | 
 74 | The most recent Jaffle Shop store opening was <Value data={store_opening} column=opened_store/> in <Value data={store_opening} column=opened_date_mmmyyyy/>. Since opening, <Value data={store_opening} column=opened_store/> has contributed <Value data={store_opening} column=revenue_pct/> of total company sales.
 75 | 
 76 | ```orders_per_week
 77 | select
 78 |     date_trunc('week', ordered_at) as week,
 79 |     location_name,
 80 |     count(*) as orders,
 81 |     sum(order_total) as revenue_usd
 82 | 
 83 | from analytics.orders
 84 | 
 85 | group by 1,2
 86 | order by 1
 87 | ```
 88 | 
 89 | <AreaChart
 90 |     data={orders_per_week}
 91 |     x=week
 92 |     y=revenue_usd
 93 |     yAxisTitle="revenue per week"
 94 |     series=location_name
 95 |     title="Weekly Revenue by Store Location"
 96 |     subtitle="Last 12 Months"
 97 | />
 98 | 
 99 | ```revenue_per_city
100 | select
101 |     location_name as city,
102 |     concat('/stores/', location_name) as store_link,
103 |     count(distinct customer_id) as customers,
104 |     count(*) as orders,
105 |     sum(order_total) as revenue_usd
106 | 
107 | from analytics.orders
108 | 
109 | group by 1, 2
110 | ```
111 | 
112 | ## Reports on Individual Stores
113 | Click a row to see the report for that store:
114 | <DataTable data={revenue_per_city} link=store_link/>
115 | 
116 | ## Seasonality
117 | See [Seasonality Investigation](/analysis/seasonality-investigation) for more information.
118 | 
119 | ## Customers
120 | To see individual customer purchase history, see [Customers](/customers)
121 | 
122 | ### Customer Cohorts
123 | Average order values are tracked using monthly cohorts, which are created by truncating `first_order_date` to month.
124 | 
125 | ```customers_with_cohort
126 | select
127 |     *,
128 |     date_trunc('month', first_ordered_at) as cohort_month,
129 |     lifetime_spend_pretax / count_lifetime_orders as average_order_value_usd0
130 | 
131 | from analytics.customers
132 | ```
133 | 
134 | ```cohorts_aov
135 | select
136 |     cohort_month,
137 |     avg(average_order_value_usd0) as cohort_aov_usd
138 | 
139 | from ${customers_with_cohort}
140 | 
141 | group by 1
142 | order by cohort_month
143 | ```
144 | 
145 | <BarChart
146 |     data={cohorts_aov}
147 |     x=cohort_month
148 |     y=cohort_aov_usd
149 |     yAxisTitle="average order value"
150 |     xAxisTitle="Monthly Cohort"
151 |     title="Customer AOV by first month cohort"
152 | />
153 | 
154 | ### Average Order Values
155 | 
156 | <Histogram
157 |     data={customers_with_cohort}
158 |     x=average_order_value_usd0
159 |     title="Distribution of AOVs"
160 |     subtitle="Customer count"
161 |     xAxisTitle=true
162 | />
163 | 


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/pages/stores/[city].md:
--------------------------------------------------------------------------------
 1 | # Jaffle Shop {$page.params.city} 🥪
 2 | 
 3 | ```opening
 4 | select
 5 |     location_name,
 6 |     min(month) as opened_month_mmmyyyy
 7 | from ${monthly_stats}
 8 | group by location_name
 9 | order by opened_month_mmmyyyy desc
10 | ```
11 | 
12 | {#if opening[0].location_name === $page.params.city}
13 | 
14 | {$page.params.city} is the most recent store opening for Jaffle Shop, opened in <Value data={opening.filter(d => d.location_name === $page.params.city)} column=opened_month_mmmyyyy />.
15 | 
16 | {:else}
17 | 
18 | The {$page.params.city} location was opened in <Value data={opening.filter(d => d.location_name === $page.params.city)} column=opened_month_mmmyyyy />.
19 | 
20 | {/if}
21 | 
22 | ```monthly_stats
23 | with
24 | monthly_stats as (
25 |     select
26 |         date_trunc('month', ordered_at) as month,
27 |         location_name,
28 |         sum(order_total) as revenue_usd1k,
29 |         count(*)::float as orders,
30 |         count(distinct customer_id)::float as customers
31 | 
32 |     from analytics.orders
33 |     group by month, location_name
34 |     order by month desc
35 | )
36 | 
37 | select
38 |     *,
39 |     revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1,
40 |     orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1,
41 |     customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1,
42 |     monthname(month) as month_name
43 | from monthly_stats
44 | ```
45 | 
46 | <BigValue
47 |     data={monthly_stats.filter(data => data.location_name === $page.params.city)}
48 |     value=revenue_usd1k
49 |     comparison=revenue_growth_pct1
50 |     title="Monthly Revenue"
51 |     comparisonTitle="vs. prev. month"
52 | />
53 | 
54 | <BigValue
55 |     data={monthly_stats.filter(data => data.location_name === $page.params.city)}
56 |     value=orders
57 |     comparison=order_growth_pct1
58 |     title="Monthly Orders"
59 |     comparisonTitle="vs. prev. month"
60 | />
61 | 
62 | Jaffle Shop {$page.params.city} served <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=customers/> happy customers in <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=month_name/>. This was a change of <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=customer_growth_pct1/> from <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=month_name row=1/>.
63 | 
64 | ```orders_per_week
65 | select
66 |     location_name as city,
67 |     date_trunc('week', ordered_at) as week,
68 |     count(*) as orders
69 | 
70 | from analytics.orders
71 | 
72 | group by 1, 2
73 | order by 1, 2
74 | ```
75 | 
76 | ## Orders Per Week in {$page.params.city}
77 | 
78 | <LineChart
79 |     data={orders_per_week.filter(data => data.city === $page.params.city)}
80 |     x=week
81 |     y=orders
82 |     yAxisTitle="orders per week in {$page.params.city}"
83 | />


--------------------------------------------------------------------------------
/test_projects/customer_success/reports/pages/stores/index.md:
--------------------------------------------------------------------------------
 1 | # Stores
 2 | 
 3 | ```revenue_per_city
 4 | select
 5 |     location_name as city,
 6 |     concat('/stores/', location_name) as store_link,
 7 |     count(distinct customer_id) as customers,
 8 |     count(*) as orders,
 9 |     sum(order_total) as revenue_usd
10 | 
11 | from analytics.orders
12 | 
13 | group by 1, 2
14 | ```
15 | 
16 | Click a row to see the report for that store:
17 | <DataTable data={revenue_per_city} link=store_link/>


--------------------------------------------------------------------------------
/test_projects/customer_success/requirements.txt:
--------------------------------------------------------------------------------
1 | dbt-postgres==1.6.0-b8
2 | jafgen~=0.3.1
3 | pre-commit~=3.0.4
4 | sqlfluff-templater-dbt~=2.0.0a5
5 | sqlfluff~=2.0.0a5


--------------------------------------------------------------------------------
/test_projects/customer_success/snapshots/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/snapshots/.gitkeep


--------------------------------------------------------------------------------
/test_projects/customer_success/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/customer_success/tests/.gitkeep


--------------------------------------------------------------------------------
/test_projects/revenue/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | target/
 3 | dbt_packages/
 4 | logs/
 5 | *.duckdb
 6 | *.duckdb.wal
 7 | reports/sources/*.csv
 8 | .meltano
 9 | .DS_Store
10 | 


--------------------------------------------------------------------------------
/test_projects/revenue/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: end-of-file-fixer
 7 |       - id: trailing-whitespace
 8 |       - id: requirements-txt-fixer
 9 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
10 |     rev: v0.0.245
11 |     hooks:
12 |       - id: ruff
13 |         args: [--fix, --exit-non-zero-on-fix]
14 |   - repo: https://github.com/pre-commit/mirrors-eslint
15 |     rev: v8.34.0
16 |     hooks:
17 |       - id: eslint
18 |   - repo: https://github.com/sqlfluff/sqlfluff
19 |     rev: "2.0.0a4"
20 |     hooks:
21 |       - id: sqlfluff-lint
22 |         additional_dependencies:
23 |           ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"]
24 |       - id: sqlfluff-fix
25 |         additional_dependencies:
26 |           ["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"]
27 |   - repo: https://github.com/psf/black
28 |     rev: "23.1.0"
29 |     hooks:
30 |       - id: black
31 |   # - repo: https://github.com/pre-commit/mirrors-prettier
32 |   #   rev: "" # Use the sha or tag you want to point at
33 |   #   hooks:
34 |   #     - id: prettier
35 | 


--------------------------------------------------------------------------------
/test_projects/revenue/.sqlfluff:
--------------------------------------------------------------------------------
 1 | [sqlfluff]
 2 | dialect = duckdb
 3 | templater = dbt
 4 | runaway_limit = 10
 5 | max_line_length = 80
 6 | indent_unit = space
 7 | 
 8 | [sqlfluff:indentation]
 9 | tab_space_size = 4
10 | 
11 | [sqlfluff:layout:type:comma]
12 | spacing_before = touch
13 | line_position = trailing
14 | 
15 | [sqlfluff:rules:capitalisation.keywords]
16 | capitalisation_policy = lower
17 | 
18 | [sqlfluff:rules:aliasing.table]
19 | aliasing = explicit
20 | 
21 | [sqlfluff:rules:aliasing.column]
22 | aliasing = explicit
23 | 
24 | [sqlfluff:rules:aliasing.expression]
25 | allow_scalar = False
26 | 
27 | [sqlfluff:rules:capitalisation.identifiers]
28 | extended_capitalisation_policy = lower
29 | 
30 | [sqlfluff:rules:capitalisation.functions]
31 | capitalisation_policy = lower
32 | 
33 | [sqlfluff:rules:capitalisation.literals]
34 | capitalisation_policy = lower
35 | 
36 | [sqlfluff:rules:ambiguous.column_references]  # Number in group by
37 | group_by_and_order_by_style = implicit
38 | 


--------------------------------------------------------------------------------
/test_projects/revenue/.sqlfluffignore:
--------------------------------------------------------------------------------
1 | reports
2 | target
3 | dbt_packages
4 | macros
5 | 


--------------------------------------------------------------------------------
/test_projects/revenue/README.md:
--------------------------------------------------------------------------------
  1 | # 🥪 The Jaffle Shop 🦘
  2 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/dbt-labs/jaffle-shop-template?quickstart=1)
  3 | [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/dbt-labs/jaffle-shop-template)
  4 | 
  5 | This is a template for creating a fully functional dbt project for teaching, learning, writing, demoing, or any other scenarios where you need a basic project with a synthesized jaffle shop business. We recommend beginners use the following steps to open this project right here on GitHub in a Codespace. If you're a little more experienced with devcontainers and want to go faster 🏎️, you can use the Gitpod link above for a quicker startup and deeper feature set.
  6 | 
  7 | ## How to use
  8 | 
  9 | ### 1. Click the big green 'Use this template' button and 'Create a new repository'.
 10 | 
 11 | ![Click use template](.github/static/use-template.gif)
 12 | 
 13 | This will create a new repository exactly like this one, and navigate you there. Make sure to execute the next instructions in that repo.
 14 | 
 15 | ### 2. Click 'Code', then 'Codespaces, then 'Create codespace on main'.
 16 | 
 17 | ![Create codespace on main](.github/static/open-codespace.gif)
 18 | 
 19 | This will create a new `codespace`, a sandboxed devcontainer with everything you need for a dbt project. Once the codespace is finished setting up, you'll be ready to run a `dbt build`.
 20 | 
 21 | ### 3. Make sure to wait til the codespace is finished setting up.
 22 | 
 23 | ![Codespaces setup screen at postCreateCommand](.github/static/codespaces-setup-screen.png)
 24 | 
 25 | After the container is built and connected to, VSCode will run a few clean up commands and then a `postCreateCommand`, a set of commands run after the container is set up. This is where we install our dependencies, such as dbt, the duckdb adapter, and other necessities, as well as run `dbt deps` to install the dbt packages we want to use. That screen will look something like the above. When it's completed it will close and leave you in a fresh terminal prompt. From there you're ready to do some analytics engineering!
 26 | 
 27 | ## Additional included tools
 28 | 
 29 | This template includes two additional tools for the other parts of the stack to create a more realistic experience:
 30 | 
 31 | - BI reporting built with [Evidence](https://evidence.dev) - an open source, code-based BI tool to write reports with markdown and SQL.
 32 | - EL with [Meltano](https://meltano.com/) - an open source tool that provides a CLI & version control for ELT pipelines.
 33 | 
 34 | ### Evidence
 35 | 
 36 | With Evidence you can:
 37 | 
 38 | - Version control your BI layer
 39 | - Build reports in the same repo as your dbt project
 40 | - Deploy your reports to a static site
 41 | 
 42 | #### Running Evidence
 43 | 
 44 | To run Evidence, use:
 45 | 
 46 | ```shell
 47 | cd reports
 48 | npm run dev
 49 | ```
 50 | 
 51 | See the [Evidence CLI docs](https://docs.evidence.dev/cli) for more details.
 52 | 
 53 | You can make changes to the markdown pages in the `reports/pages` folder and see the reports update in the browser preview.
 54 | 
 55 | #### Learning More about Evidence
 56 | 
 57 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence)
 58 | - [Project Home Page](https://www.evidence.dev)
 59 | - [Github](https://github.com/evidence-dev/evidence)
 60 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)
 61 | 
 62 | ### Meltano
 63 | 
 64 | This project is preconfigured with Meltano, which can be used to extract and load raw data into DuckDB.
 65 | 
 66 | #### Run EL (Extract and Load) using Meltano
 67 | 
 68 | ```console
 69 | meltano run tap-jaffle-shop target-duckdb
 70 | ```
 71 | 
 72 | Optionally, you can modify extract parameters using environment variables. For instance, this modified version will extract five years of data instead of the default 1 year.
 73 | 
 74 | ```console
 75 | TAP_JAFFLE_SHOP_YEARS=5
 76 | meltano run tap-jaffle-shop target-duckdb
 77 | ```
 78 | 
 79 | You can also modify any tap or target config with the interactive `config` command:
 80 | 
 81 | ```console
 82 | meltano config tap-jaffle-shop set --interactive
 83 | meltano config target-duckdb set --interactive
 84 | ```
 85 | 
 86 | ## Local development
 87 | 
 88 | This project is optimized for running in a container. If you'd like to use it locally outside of container you'll need to follow the instructions below.
 89 | 
 90 | 1. Create a python virtual environment and install the dependencies.
 91 | 
 92 | ```console
 93 | python3 -m venv .venv
 94 | source .venv/bin/activate
 95 | pip install -r requirements.txt
 96 | ```
 97 | 
 98 | 2. Install meltano with [pipx](https://pypa.github.io/pipx/installation/). And install meltano's dependencies.
 99 | 
100 | ```console
101 | pipx install meltano
102 | meltano install
103 | ```
104 | 
105 | 3. Run the EL pipeline.
106 | 
107 | ```console
108 | meltano run el
109 | ```
110 | 
111 | 4. Install dbt dependencies and build the dbt project.
112 | 
113 | ```console
114 | dbt deps
115 | dbt build
116 | ```
117 | 
118 | 5. Install Evidence dependencies and run the Evidence server.
119 | 
120 | ```console
121 | cd reports
122 | npm install
123 | npm run dev
124 | ```
125 | 
126 | ## Contributing
127 | 
128 | We welcome issues and PRs requesting or adding new features. The package that generates the synthetic data, [`jafgen`](https://pypi.org/project/jafgen/), is also under active development, and will add more types of source data to model as we go along. If you have tests, descriptions, new models, metrics, materializations types, or techniques you use this repo to demonstrate, which you feel would make for a more expansive baseline experience, we encourage you to consider contributing them back in so that this project becomes an even better collective tool for exploring and learning dbt over time.
129 | 


--------------------------------------------------------------------------------
/test_projects/revenue/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | 
3 | tasks:
4 |   deps:
5 |     cmds:
6 |       - python -m pip install --progress-bar off -r requirements.txt
7 |       - dbt deps
8 | 


--------------------------------------------------------------------------------
/test_projects/revenue/analyses/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/analyses/.gitkeep


--------------------------------------------------------------------------------
/test_projects/revenue/dbt_loom.config.yml:
--------------------------------------------------------------------------------
1 | manifests:
2 |   - name: potato
3 |     type: file
4 |     optional: true
5 |     config:
6 |       path: ../customer_success/target/manifest.json
7 |     excluded_packages:
8 |       - dbt_project_evaluator
9 | 


--------------------------------------------------------------------------------
/test_projects/revenue/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | # Name your project! Project names should contain only lowercase characters
 2 | # and underscores. A good package name should reflect your organization's
 3 | # name or the intended use of these models
 4 | name: "revenue"
 5 | version: "1.0.0"
 6 | config-version: 2
 7 | 
 8 | # This setting configures which "profile" dbt uses for this project.
 9 | profile: "revenue"
10 | 
11 | # These configurations specify where dbt should look for different types of files.
12 | # The `model-paths` config, for example, states that models in this project can be
13 | # found in the "models/" directory. You probably won't need to change these!
14 | model-paths: ["models"]
15 | analysis-paths: ["analyses"]
16 | test-paths: ["tests"]
17 | seed-paths: ["seeds"]
18 | macro-paths: ["macros"]
19 | snapshot-paths: ["snapshots"]
20 | 
21 | target-path: "target" # directory which will store compiled SQL files
22 | clean-targets: # directories to be removed by `dbt clean`
23 |   - "target"
24 |   - "dbt_packages"
25 | 
26 | vars:
27 |   truncate_timespan_to: "{{ current_timestamp() }}"
28 | 
29 | # Configuring models
30 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
31 | 
32 | restrict-access: false
33 | 
34 | # In this example config, we tell dbt to build all models in the example/ directory
35 | # as tables. These settings can be overridden in the individual model files
36 | # using the `{{ config(...) }}` macro.
37 | 
38 | models:
39 |   revenue:
40 |     staging:
41 |       +materialized: view
42 |     marts:
43 |       +materialized: table
44 |   dbt_project_evaluator:
45 |     +access: private
46 |     marts:
47 |       dag:
48 |         fct_source_fanout:
49 |           +enabled: true
50 | 


--------------------------------------------------------------------------------
/test_projects/revenue/jaffle-data/raw_products.csv:
--------------------------------------------------------------------------------
 1 | sku,name,type,price,description
 2 | JAF-001,nutellaphone who dis?,jaffle,1100,nutella and banana jaffle
 3 | JAF-002,doctor stew,jaffle,1100,house-made beef stew jaffle
 4 | JAF-003,the krautback,jaffle,1200,lamb and pork bratwurst with house-pickled cabbage sauerkraut and mustard
 5 | JAF-004,flame impala,jaffle,1400,"pulled pork and pineapple al pastor marinated in ghost pepper sauce,             kevin parker's favorite! "
 6 | JAF-005,mel-bun,jaffle,1200,"melon and minced beef bao, in a jaffle, savory and sweet"
 7 | BEV-001,tangaroo,beverage,600,mango and tangerine smoothie
 8 | BEV-002,chai and mighty,beverage,500,oatmilk chai latte with protein boost
 9 | BEV-003,vanilla ice,beverage,600,iced coffee with house-made french vanilla syrup
10 | BEV-004,for richer or pourover ,beverage,700,daily selection of single estate beans for a delicious hot pourover
11 | BEV-005,adele-ade,beverage,400,"a kiwi and lime agua fresca, hello from the other side of thirst"
12 | 


--------------------------------------------------------------------------------
/test_projects/revenue/jaffle-data/raw_stores.csv:
--------------------------------------------------------------------------------
1 | id,name,opened_at,tax_rate
2 | 7f790ed7-0fc4-4de2-a1b0-cce72e657fc4,Philadelphia,2016-09-01T00:00:00,0.06
3 | 08d44615-06d3-4086-a5d7-21395a1d975e,Brooklyn,2017-03-12T00:00:00,0.04
4 | f6f2bd97-becb-4e1c-a611-20c7cf579841,Chicago,2018-04-29T00:00:00,0.0625
5 | 48b0172c-4490-4f05-b290-e69f418d0575,San Francisco,2018-05-09T00:00:00,0.075
6 | ed2af26d-35a1-4a31-ac65-7aedcaa7b7a7,New Orleans,2019-03-10T00:00:00,0.04
7 | 


--------------------------------------------------------------------------------
/test_projects/revenue/jaffle-data/raw_supplies.csv:
--------------------------------------------------------------------------------
 1 | id,name,cost,perishable,sku
 2 | SUP-001,compostable cutlery - knife,7,False,JAF-001
 3 | SUP-002,cutlery - fork,7,False,JAF-001
 4 | SUP-003,serving boat,11,False,JAF-001
 5 | SUP-004,napkin,4,False,JAF-001
 6 | SUP-009,bread,33,True,JAF-001
 7 | SUP-011,nutella,46,True,JAF-001
 8 | SUP-012,banana,13,True,JAF-001
 9 | SUP-001,compostable cutlery - knife,7,False,JAF-002
10 | SUP-002,cutlery - fork,7,False,JAF-002
11 | SUP-003,serving boat,11,False,JAF-002
12 | SUP-004,napkin,4,False,JAF-002
13 | SUP-009,bread,33,True,JAF-002
14 | SUP-010,cheese,20,True,JAF-002
15 | SUP-013,beef stew,169,True,JAF-002
16 | SUP-001,compostable cutlery - knife,7,False,JAF-003
17 | SUP-002,cutlery - fork,7,False,JAF-003
18 | SUP-003,serving boat,11,False,JAF-003
19 | SUP-004,napkin,4,False,JAF-003
20 | SUP-009,bread,33,True,JAF-003
21 | SUP-010,cheese,20,True,JAF-003
22 | SUP-014,lamb and pork bratwurst,234,True,JAF-003
23 | SUP-015,house-pickled cabbage sauerkraut,43,True,JAF-003
24 | SUP-016,mustard,7,True,JAF-003
25 | SUP-001,compostable cutlery - knife,7,False,JAF-004
26 | SUP-002,cutlery - fork,7,False,JAF-004
27 | SUP-003,serving boat,11,False,JAF-004
28 | SUP-004,napkin,4,False,JAF-004
29 | SUP-009,bread,33,True,JAF-004
30 | SUP-010,cheese,20,True,JAF-004
31 | SUP-017,pulled pork,215,True,JAF-004
32 | SUP-018,pineapple,26,True,JAF-004
33 | SUP-021,ghost pepper sauce,20,True,JAF-004
34 | SUP-001,compostable cutlery - knife,7,False,JAF-005
35 | SUP-002,cutlery - fork,7,False,JAF-005
36 | SUP-003,serving boat,11,False,JAF-005
37 | SUP-004,napkin,4,False,JAF-005
38 | SUP-009,bread,33,True,JAF-005
39 | SUP-010,cheese,20,True,JAF-005
40 | SUP-019,melon,33,True,JAF-005
41 | SUP-020,minced beef,124,True,JAF-005
42 | SUP-005,16oz compostable clear cup,13,False,BEV-001
43 | SUP-006,16oz compostable clear lid,4,False,BEV-001
44 | SUP-007,biodegradable straw,13,False,BEV-001
45 | SUP-022,mango,32,True,BEV-001
46 | SUP-023,tangerine,20,True,BEV-001
47 | SUP-005,16oz compostable clear cup,13,False,BEV-002
48 | SUP-006,16oz compostable clear lid,4,False,BEV-002
49 | SUP-007,biodegradable straw,13,False,BEV-002
50 | SUP-008,chai mix,98,True,BEV-002
51 | SUP-024,oatmilk,11,True,BEV-002
52 | SUP-025,whey protein,36,True,BEV-002
53 | SUP-005,16oz compostable clear cup,13,False,BEV-003
54 | SUP-006,16oz compostable clear lid,4,False,BEV-003
55 | SUP-007,biodegradable straw,13,False,BEV-003
56 | SUP-026,coffee,52,True,BEV-003
57 | SUP-027,french vanilla syrup,72,True,BEV-003
58 | SUP-005,16oz compostable clear cup,13,False,BEV-004
59 | SUP-006,16oz compostable clear lid,4,False,BEV-004
60 | SUP-007,biodegradable straw,13,False,BEV-004
61 | SUP-026,coffee,52,True,BEV-004
62 | SUP-005,16oz compostable clear cup,13,False,BEV-005
63 | SUP-006,16oz compostable clear lid,4,False,BEV-005
64 | SUP-007,biodegradable straw,13,False,BEV-005
65 | SUP-028,kiwi,20,True,BEV-005
66 | SUP-029,lime,13,True,BEV-005
67 | 


--------------------------------------------------------------------------------
/test_projects/revenue/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/macros/.gitkeep


--------------------------------------------------------------------------------
/test_projects/revenue/macros/cents_to_dollars.sql:
--------------------------------------------------------------------------------
1 | {# A basic example for a project-wide macro to cast a column uniformly #}
2 | 
3 | {% macro cents_to_dollars(column_name, precision=2) -%}
4 |     ({{ column_name }} / 100)::numeric(16, {{ precision }})
5 | {%- endmacro %}
6 | 


--------------------------------------------------------------------------------
/test_projects/revenue/meltano.yml:
--------------------------------------------------------------------------------
 1 | # Meltano Configuration File
 2 | #
 3 | # Sample usage:
 4 | # > meltano run tap-jaffle-shop target-duckdb
 5 | #
 6 | # Or equivalently:
 7 | # > meltano run el  # Run the job named 'el' to extract and load data
 8 | 
 9 | version: 1
10 | project_id: Jaffle Shop Template Project
11 | 
12 | env:
13 |   JAFFLE_DB_NAME: jaffle_shop
14 |   JAFFLE_RAW_SCHEMA: jaffle_raw
15 | 
16 | default_environment: dev
17 | environments:
18 |   - name: dev
19 | 
20 | plugins:
21 |   extractors:
22 |     - name: tap-jaffle-shop
23 |       namespace: tap_jaffle_shop
24 |       variant: meltanolabs
25 |       pip_url: git+https://github.com/MeltanoLabs/tap-jaffle-shop.git@v0.3.0
26 |       capabilities:
27 |         - catalog
28 |         - discover
29 |       config:
30 |         years: 2
31 |         stream_name_prefix: ${JAFFLE_RAW_SCHEMA}-raw_
32 |   loaders:
33 |     - name: target-postgres
34 |       variant: datamill-co
35 |       pip_url: git+https://github.com/datamill-co/target-postgres.git@v0.1.0
36 |       config:
37 |         host: 127.0.0.1
38 |         user: postgres
39 |         password: ${SNOWFLAKE_PASSWORD}
40 |         default_target_schema: $JAFFLE_RAW_SCHEMA
41 | 
42 | jobs:
43 |   # Sample usage:  `meltano run el`
44 |   # Equivalent to: `meltano run tap-jaffle-shop target-duckdb`
45 |   - name: el # Extract and load the raw data
46 |     tasks:
47 |       - tap-jaffle-shop target-postgres
48 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/groups.yml:
--------------------------------------------------------------------------------
1 | groups:
2 |   - name: sales
3 |     owner:
4 |       email: sales@example.com
5 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/marts/__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: orders
 5 |     description: >
 6 |       Order overview data mart, offering key details for each order including if it's a customer's
 7 |       first order and a food vs. drink item breakdown. One row per order.
 8 |     access: public
 9 |     tests:
10 |       - dbt_utils.expression_is_true:
11 |           expression: "count_food_items + count_drink_items = count_items"
12 |       - dbt_utils.expression_is_true:
13 |           expression: "subtotal_food_items + subtotal_drink_items = subtotal"
14 | 
15 |     columns:
16 |       - name: order_id
17 |         description: The unique key of the orders mart.
18 |         tests:
19 |           - not_null
20 |           - unique
21 |       - name: customer_id
22 |         description: The foreign key relating to the customer who placed the order.
23 |       - name: location_id
24 |         description: The foreign key relating to the location the order was placed at.
25 |       - name: order_total
26 |         description: The total amount of the order in USD including tax.
27 |       - name: ordered_at
28 |         description: The timestamp the order was placed at.
29 |       - name: count_food_items
30 |         description: The number of individual food items ordered.
31 |       - name: count_drink_items
32 |         description: The number of individual drink items ordered.
33 |       - name: count_items
34 |         description: The total number of both food and drink items ordered.
35 |       - name: subtotal_food_items
36 |         description: The sum of all the food item prices without tax.
37 |       - name: subtotal_drink_items
38 |         description: The sum of all the drink item prices without tax.
39 |       - name: subtotal
40 |         description: The sum total of both food and drink item prices without tax.
41 |       - name: order_cost
42 |         description: The sum of supply expenses to fulfill the order.
43 |       - name: location_name
44 |         description: >
45 |           The full location name of where this order was placed. Denormalized from `stg_locations`.
46 |       - name: is_first_order
47 |         description: >
48 |           A boolean indicating if this order is from a new customer placing their first order.
49 |       - name: is_food_order
50 |         description: A boolean indicating if this order included any food items.
51 |       - name: is_drink_order
52 |         description: A boolean indicating if this order included any drink items.
53 | 
54 |     latest_version: 2
55 |     versions:
56 |       - v: 1
57 |         deprecation_date: "2024-01-01"
58 | 
59 |       - v: 2
60 |         columns:
61 |           - include: all
62 |             exclude: [location_id]
63 | 
64 |   - name: accounts
65 |     description: >
66 |       All accounts with whom we have done business. This is a very sensitive asset.
67 |     access: private
68 |     group: sales
69 | 
70 |     columns:
71 |       - name: name
72 |         description: Name of the account.
73 |         tests:
74 |           - not_null
75 |           - unique
76 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/marts/accounts.sql:
--------------------------------------------------------------------------------
1 | with
2 | 
3 | final as (
4 |     select name from {{ ref('stg_accounts') }}
5 | )
6 | 
7 | 
8 | select * from final
9 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/marts/orders_v1.sql:
--------------------------------------------------------------------------------
  1 | {{
  2 |     config(
  3 |         materialized = 'incremental',
  4 |         unique_key = 'order_id'
  5 |     )
  6 | }}
  7 | 
  8 | with
  9 | 
 10 | {#
 11 |      DuckDB will see {{ this }} evaluate to `orders` and a CTE called `orders` as being the same
 12 |      so when using DuckDB we append `_set` to any CTEs with the same name as {{ this }} to indicate
 13 |      we're not executing a recursive statement
 14 | #}
 15 | 
 16 | orders_set as (
 17 | 
 18 |     select * from {{ ref('stg_orders') }}
 19 | 
 20 |     where
 21 |         true
 22 | 
 23 |         {% if is_incremental() %}
 24 | 
 25 |             and ordered_at >= (
 26 |                 select max(ordered_at) as most_recent_record from {{ this }}
 27 |             )
 28 | 
 29 |         {% endif %}
 30 | 
 31 | ),
 32 | 
 33 | order_items as (
 34 | 
 35 |     select * from {{ ref('stg_order_items') }}
 36 | 
 37 | ),
 38 | 
 39 | products as (
 40 | 
 41 |     select * from {{ ref('stg_products') }}
 42 | 
 43 | ),
 44 | 
 45 | locations as (
 46 | 
 47 |     select * from {{ ref('stg_locations') }}
 48 | 
 49 | ),
 50 | 
 51 | supplies as (
 52 | 
 53 |     select * from {{ ref('stg_supplies') }}
 54 | 
 55 | ),
 56 | 
 57 | order_items_summary as (
 58 | 
 59 |     select
 60 | 
 61 |         order_items.order_id,
 62 | 
 63 |         sum(products.is_food_item) as count_food_items,
 64 |         sum(products.is_drink_item) as count_drink_items,
 65 |         count(*) as count_items,
 66 |         sum(
 67 |             case
 68 |                 when products.is_food_item = 1 then products.product_price
 69 |                 else 0
 70 |             end
 71 |         ) as subtotal_drink_items,
 72 |         sum(
 73 |             case
 74 |                 when products.is_drink_item = 1 then products.product_price
 75 |                 else 0
 76 |             end
 77 |         ) as subtotal_food_items,
 78 |         sum(products.product_price) as subtotal
 79 | 
 80 |     from order_items
 81 | 
 82 |     left join products on order_items.product_id = products.product_id
 83 | 
 84 |     group by 1
 85 | 
 86 | ),
 87 | 
 88 | order_supplies_summary as (
 89 | 
 90 |     select
 91 | 
 92 |         order_items.order_id,
 93 | 
 94 |         sum(supplies.supply_cost) as order_cost
 95 | 
 96 |     from order_items
 97 | 
 98 |     left join supplies on order_items.product_id = supplies.product_id
 99 | 
100 |     group by 1
101 | 
102 | ),
103 | 
104 | joined as (
105 | 
106 |     select
107 | 
108 |         orders_set.*,
109 | 
110 |         order_items_summary.count_food_items,
111 |         order_items_summary.count_drink_items,
112 |         order_items_summary.count_items,
113 | 
114 |         order_items_summary.subtotal_drink_items,
115 |         order_items_summary.subtotal_food_items,
116 |         order_items_summary.subtotal,
117 | 
118 |         order_supplies_summary.order_cost,
119 |         locations.location_name
120 | 
121 |     from orders_set
122 | 
123 |     left join order_items_summary
124 |         on orders_set.order_id = order_items_summary.order_id
125 |     left join order_supplies_summary
126 |         on orders_set.order_id = order_supplies_summary.order_id
127 |     left join locations
128 |         on orders_set.location_id = locations.location_id
129 | 
130 | ),
131 | 
132 | final as (
133 | 
134 |     select
135 | 
136 |         *,
137 |         count_food_items > 0 as is_food_order,
138 |         count_drink_items > 0 as is_drink_order
139 | 
140 |     from joined
141 | 
142 | )
143 | 
144 | select * from final
145 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/marts/orders_v2.sql:
--------------------------------------------------------------------------------
  1 | {{
  2 |     config(
  3 |         materialized = 'incremental',
  4 |         unique_key = 'order_id'
  5 |     )
  6 | }}
  7 | 
  8 | with
  9 | 
 10 | {#
 11 |      DuckDB will see {{ this }} evaluate to `orders` and a CTE called `orders` as being the same
 12 |      so when using DuckDB we append `_set` to any CTEs with the same name as {{ this }} to indicate
 13 |      we're not executing a recursive statement
 14 | #}
 15 | 
 16 | orders_set as (
 17 | 
 18 |     select * from {{ ref('stg_orders') }}
 19 | 
 20 |     where
 21 |         true
 22 | 
 23 |         {% if is_incremental() %}
 24 | 
 25 |             and ordered_at >= (
 26 |                 select max(ordered_at) as most_recent_record from {{ this }}
 27 |             )
 28 | 
 29 |         {% endif %}
 30 | 
 31 | ),
 32 | 
 33 | order_items as (
 34 | 
 35 |     select * from {{ ref('stg_order_items') }}
 36 | 
 37 | ),
 38 | 
 39 | products as (
 40 | 
 41 |     select * from {{ ref('stg_products') }}
 42 | 
 43 | ),
 44 | 
 45 | locations as (
 46 | 
 47 |     select * from {{ ref('stg_locations') }}
 48 | 
 49 | ),
 50 | 
 51 | supplies as (
 52 | 
 53 |     select * from {{ ref('stg_supplies') }}
 54 | 
 55 | ),
 56 | 
 57 | accounts as (
 58 |     select * from {{ ref('stg_accounts') }}
 59 | ),
 60 | 
 61 | order_items_summary as (
 62 | 
 63 |     select
 64 | 
 65 |         order_items.order_id,
 66 | 
 67 |         sum(products.is_food_item) as count_food_items,
 68 |         sum(products.is_drink_item) as count_drink_items,
 69 |         count(*) as count_items,
 70 |         sum(
 71 |             case
 72 |                 when products.is_food_item = 1 then products.product_price
 73 |                 else 0
 74 |             end
 75 |         ) as subtotal_drink_items,
 76 |         sum(
 77 |             case
 78 |                 when products.is_drink_item = 1 then products.product_price
 79 |                 else 0
 80 |             end
 81 |         ) as subtotal_food_items,
 82 |         sum(products.product_price) as subtotal
 83 | 
 84 |     from order_items
 85 | 
 86 |     left join products on order_items.product_id = products.product_id
 87 | 
 88 |     group by 1
 89 | 
 90 | ),
 91 | 
 92 | order_supplies_summary as (
 93 | 
 94 |     select
 95 | 
 96 |         order_items.order_id,
 97 | 
 98 |         sum(supplies.supply_cost) as order_cost
 99 | 
100 |     from order_items
101 | 
102 |     left join supplies on order_items.product_id = supplies.product_id
103 | 
104 |     group by 1
105 | 
106 | ),
107 | 
108 | joined as (
109 | 
110 |     select
111 | 
112 |         orders_set.* exclude location_id,
113 | 
114 |         order_items_summary.count_food_items,
115 |         order_items_summary.count_drink_items,
116 |         order_items_summary.count_items,
117 | 
118 |         order_items_summary.subtotal_drink_items,
119 |         order_items_summary.subtotal_food_items,
120 |         order_items_summary.subtotal,
121 | 
122 |         order_supplies_summary.order_cost,
123 |         locations.location_name
124 | 
125 |     from orders_set
126 | 
127 |     left join order_items_summary
128 |         on orders_set.order_id = order_items_summary.order_id
129 |     left join order_supplies_summary
130 |         on orders_set.order_id = order_supplies_summary.order_id
131 |     left join locations
132 |         on orders_set.location_id = locations.location_id
133 | 
134 | ),
135 | 
136 | final as (
137 | 
138 |     select
139 | 
140 |         *,
141 |         count_food_items > 0 as is_food_order,
142 |         count_drink_items > 0 as is_drink_order
143 | 
144 |     from joined
145 | 
146 | )
147 | 
148 | select * from final
149 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_locations
 5 |     description: List of open locations with basic cleaning and transformation applied, one row per location.
 6 |     columns:
 7 |       - name: location_id
 8 |         description: The unique key for each location.
 9 |         tests:
10 |           - not_null
11 |           - unique
12 | 
13 |   - name: stg_order_items
14 |     description: Individual food and drink items that make up our orders, one row per item.
15 |     columns:
16 |       - name: order_item_id
17 |         description: The unique key for each order item.
18 |         tests:
19 |           - not_null
20 |           - unique
21 | 
22 |   - name: stg_orders
23 |     description: Order data with basic cleaning and transformation applied, one row per order.
24 |     columns:
25 |       - name: order_id
26 |         description: The unique key for each order.
27 |         tests:
28 |           - not_null
29 |           - unique
30 | 
31 |   - name: stg_products
32 |     description: Product (food and drink items that can be ordered) data with basic cleaning and transformation applied, one row per product.
33 |     columns:
34 |       - name: product_id
35 |         description: The unique key for each product.
36 |         tests:
37 |           - not_null
38 |           - unique
39 | 
40 |   - name: stg_supplies
41 |     description: >
42 |       List of our supply expenses data with basic cleaning and transformation applied.
43 | 
44 |       One row per supply cost, not per supply. As supply costs fluctuate they receive a new row with a new UUID. Thus there can be multiple rows per supply_id.
45 |     columns:
46 |       - name: supply_uuid
47 |         description: The unique key of our supplies per cost.
48 |         tests:
49 |           - not_null
50 |           - unique
51 | 
52 |   - name: stg_accounts
53 |     description: >
54 |       List of all accounts.
55 |     columns:
56 |       - name: name
57 |         description: The unique key of our accounts.
58 |         tests:
59 |           - not_null
60 |           - unique
61 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: ecom
 5 |     schema: raw
 6 |     description: E-commerce data
 7 |     tables:
 8 |       - name: raw_orders
 9 |         meta:
10 |           external_location: "read_csv('jaffle-data/raw_orders.csv',AUTO_DETECT=TRUE)"
11 |         description: One record per order (consisting of one or more order items)
12 |       - name: raw_items
13 |         meta:
14 |           external_location: "read_csv('jaffle-data/raw_items.csv', names=['id', 'order_id', 'sku'], AUTO_DETECT=TRUE)"
15 |         description: Items included in an order
16 |       - name: raw_stores
17 |         meta:
18 |           external_location: "read_csv('jaffle-data/raw_stores.csv',AUTO_DETECT=TRUE)"
19 |         description: One record per physical store location
20 |       - name: raw_products
21 |         meta:
22 |           external_location: "read_csv('jaffle-data/raw_products.csv',AUTO_DETECT=TRUE)"
23 |         description: One record per SKU for items sold in stores
24 |       - name: raw_supplies
25 |         meta:
26 |           external_location: "read_csv('jaffle-data/raw_supplies.csv',AUTO_DETECT=TRUE)"
27 |         description: One record per supply per SKU of items sold in stores
28 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/stg_accounts.sql:
--------------------------------------------------------------------------------
1 | select * from {{ ref('seed_accounts') }}


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/stg_locations.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | with
 3 | 
 4 | source as (
 5 | 
 6 |     select * from {{ source('ecom', 'raw_stores') }}
 7 | 
 8 |     {# data runs to 2026, truncate timespan to desired range,
 9 |     current time as default #}
10 |     where opened_at::timestamptz <= {{ var('truncate_timespan_to') }}
11 | 
12 | ),
13 | 
14 | renamed as (
15 | 
16 |     select
17 | 
18 |         ----------  ids
19 |         id as location_id,
20 | 
21 |         ---------- properties
22 |         name as location_name,
23 |         tax_rate,
24 | 
25 |         ---------- timestamp
26 |         opened_at
27 | 
28 |     from source
29 | 
30 | )
31 | 
32 | select * from renamed
33 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/stg_order_items.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | with
 3 | 
 4 | source as (
 5 | 
 6 |     select * from {{ source('ecom', 'raw_items') }}
 7 | 
 8 | ),
 9 | 
10 | renamed as (
11 | 
12 |     select
13 | 
14 |         ----------  ids
15 |         id as order_item_id,
16 |         order_id,
17 | 
18 |         ---------- properties
19 |         sku as product_id
20 | 
21 |     from source
22 | 
23 | )
24 | 
25 | select * from renamed
26 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/stg_orders.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='table'
 4 |     )
 5 | }}
 6 | 
 7 | with
 8 | 
 9 | source as (
10 | 
11 |     select * from {{ source('ecom', 'raw_orders') }}
12 | 
13 |     -- data runs to 2026, truncate timespan to desired range,
14 |     -- current time as default
15 |     where ordered_at::timestamptz <= {{ var('truncate_timespan_to') }}
16 | 
17 | ),
18 | 
19 | renamed as (
20 | 
21 |     select
22 | 
23 |         ----------  ids
24 |         id as order_id,
25 |         store_id as location_id,
26 |         customer as customer_id,
27 | 
28 |         ---------- properties
29 |         (order_total / 100.0)::float as order_total,
30 |         (tax_paid / 100.0)::float as tax_paid,
31 | 
32 |         ---------- timestamps
33 |         ordered_at
34 | 
35 |     from source
36 | 
37 | )
38 | 
39 | select * from renamed
40 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/stg_products.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | with
 3 | 
 4 | source as (
 5 | 
 6 |     select * from {{ source('ecom', 'raw_products') }}
 7 | 
 8 | ),
 9 | 
10 | renamed as (
11 | 
12 |     select
13 | 
14 |         ----------  ids
15 |         sku as product_id,
16 | 
17 |         ---------- properties
18 |         name as product_name,
19 |         type as product_type,
20 |         description as product_description,
21 |         (price / 100.0)::float as product_price,
22 | 
23 | 
24 |         ---------- derived
25 |         case
26 |             when type = 'jaffle' then 1
27 |             else 0
28 |         end as is_food_item,
29 | 
30 |         case
31 |             when type = 'beverage' then 1
32 |             else 0
33 |         end as is_drink_item
34 | 
35 |     from source
36 | 
37 | )
38 | 
39 | select * from renamed
40 | 


--------------------------------------------------------------------------------
/test_projects/revenue/models/staging/stg_supplies.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | with
 3 | 
 4 | source as (
 5 | 
 6 |     select * from {{ source('ecom', 'raw_supplies') }}
 7 | 
 8 | ),
 9 | 
10 | renamed as (
11 | 
12 |     select
13 | 
14 |         ----------  ids
15 |         {{ dbt_utils.generate_surrogate_key(['id', 'sku']) }} as supply_uuid,
16 |         id as supply_id,
17 |         sku as product_id,
18 | 
19 |         ---------- properties
20 |         name as supply_name,
21 |         (cost / 100.0)::float as supply_cost,
22 |         perishable as is_perishable_supply
23 | 
24 |     from source
25 | 
26 | )
27 | 
28 | select * from renamed
29 | 


--------------------------------------------------------------------------------
/test_projects/revenue/package-lock.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: 1.0.0
4 |   - package: dbt-labs/dbt_project_evaluator
5 |     version: 0.14.3
6 | sha1_hash: 52459ce227fef835e4466cbb12d624b3e1971fae
7 | 


--------------------------------------------------------------------------------
/test_projects/revenue/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: 1.0.0
4 |   - package: dbt-labs/dbt_project_evaluator
5 |     version: 0.14.3
6 | 


--------------------------------------------------------------------------------
/test_projects/revenue/profiles.yml:
--------------------------------------------------------------------------------
1 | revenue:
2 |   outputs:
3 |     dev:
4 |       type: duckdb
5 |       path: ../database.db
6 |       threads: 4
7 |   target: dev
8 | 


--------------------------------------------------------------------------------
/test_projects/revenue/reports/.evidence/customization/custom-formatting.json:
--------------------------------------------------------------------------------
1 | {
2 |   "version": "1.0",
3 |   "customFormats": []
4 | }


--------------------------------------------------------------------------------
/test_projects/revenue/reports/.gitignore:
--------------------------------------------------------------------------------
1 | .evidence/template
2 | .svelte-kit
3 | build
4 | node_modules
5 | .DS_Store
6 | *.*duckdb
7 | 


--------------------------------------------------------------------------------
/test_projects/revenue/reports/README.md:
--------------------------------------------------------------------------------
 1 | # Jaffle Shop BI Reports
 2 | 
 3 | This project uses [Evidence.dev](https://Evidence.dev) for BI reporting.
 4 | 
 5 | ## Getting Started
 6 | 
 7 | Run the BI server from your local workstation:
 8 | 
 9 | ```shell
10 | cd reports
11 | npm run dev
12 | ```
13 | 
14 | This will launch the Evidence webserver in developer mode.
15 | 
16 | ## Testing for breakages
17 | 
18 | The following command can be used to confirm that reports and queries are still valid:
19 | 
20 | ```console
21 | npm run build:strict
22 | ```
23 | 
24 | ## Updating to the latest version of Evidence
25 | 
26 | 1. Check your version against the version number for the [latest release]([Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)).
27 | 2. Run `npm install evidence-dev/evidence@latest` to bump the version in `package.json` and automatically update dependenceis in `package-lock.json`.
28 | 
29 | ## Learning More
30 | 
31 | - [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence)
32 | - [Project Home Page](https://www.evidence.dev)
33 | - [Github](https://github.com/evidence-dev/evidence)
34 | - [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)
35 | 


--------------------------------------------------------------------------------
/test_projects/revenue/reports/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "jaffle-shop",
 3 |   "version": "0.0.1",
 4 |   "scripts": {
 5 |     "build": "evidence build",
 6 |     "build:strict": "evidence build:strict",
 7 |     "dev": "evidence dev --host 0.0.0.0 --open /",
 8 |     "test": "evidence build",
 9 |     "help": "evidence --help"
10 |   },
11 |   "engines": {
12 |     "npm": ">=7.0.0",
13 |     "node": ">=16.14.0"
14 |   },
15 |   "type": "module",
16 |   "dependencies": {
17 |     "@evidence-dev/evidence": "15.0.1",
18 |     "@evidence-dev/preprocess": "2.2.0",
19 |     "@evidence-dev/components": "2.2.1"
20 |   },
21 |   "overrides": {
22 |     "jsonwebtoken": "9.0.0",
23 |     "trim@<0.0.3": ">0.0.3",
24 |     "sqlite3": "5.1.5"
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/test_projects/revenue/reports/pages/analysis/seasonality-investigation.md:
--------------------------------------------------------------------------------
  1 | # Seasonality Investigation
  2 | *Written by Melissa Cranston in September 2017*
  3 | 
  4 | *Analysis covers the time period of September 2016 to August 2017. All queries have been limited to that range.*
  5 | 
  6 | [Jump to conclusions & recommendations &darr;](#Conclusions5)
  7 | 
  8 | ## Variations in Order Volume
  9 | Plotting orders per day for the last 12 months reveals 3 things:
 10 | - An unnaturally large jump in orders per day in March 2017 - this was driven by the new store opening in [Brooklyn](/stores/Brooklyn)
 11 | - A repeating pattern of spikes which might be driven by different order volumes on specific days of the week
 12 | - A drop in total orders per day around June 2017
 13 | 
 14 | ```orders_per_day
 15 | select
 16 |     date_trunc('day', ordered_at) as date,
 17 |     count(*) as orders
 18 | 
 19 | from analytics.orders
 20 | where ordered_at between '2016-09-01' and '2017-08-31'
 21 | 
 22 | group by 1
 23 | order by 1
 24 | ```
 25 | 
 26 | <LineChart
 27 |     data={orders_per_day}
 28 |     x=date
 29 |     y=orders
 30 |     yAxisTitle="orders per day"
 31 |     title="Orders per Day"
 32 | />
 33 | 
 34 | ## Day of Week
 35 | We can calculate average orders by day of week to check if there are differences in order volume across days.
 36 | 
 37 | ```orders_by_weekday
 38 | select
 39 |     date_part('dayofweek', date) as day_of_week_num,
 40 |     dayname(date) as day_of_week,
 41 |     avg(orders) as avg_orders
 42 | from ${orders_per_day}
 43 | group by 1, 2
 44 | order by day_of_week_num
 45 | ```
 46 | 
 47 | <BarChart
 48 |     data={orders_by_weekday}
 49 |     x=day_of_week
 50 |     y=avg_orders
 51 |     swapXY=true
 52 |     title="Average Orders by Day of Week"
 53 |     yAxisTitle="Avg Orders Per Day"
 54 | />
 55 | 
 56 | This reveals that weekdays generate significantly higher order volume than weekends. It also shows that orders are fairly consistent across individual days on weekdays (202-209 orders/day) and weekends (~50 orders/day).
 57 | 
 58 | ## Hour of Day
 59 | Now we'll break down orders by hour of day to see if there are patterns within days. Given the differences we just found between weekday and weekend volumes, we should split the results by those day types. We can use a loop for this.
 60 | 
 61 | ```orders_hour_of_day
 62 | with
 63 |     orders_by_hour as (
 64 |         select
 65 |             date_part('hour', ordered_at) as hour_of_day,
 66 |             if(dayname(ordered_at) in ('Sunday', 'Saturday'), 'Weekend', 'Weekday') as day_type,
 67 |             count(*)::float as orders,
 68 |             count(distinct date_trunc('day', ordered_at)) as days
 69 |         from analytics.orders
 70 |         where ordered_at between '2016-09-01' and '2017-08-31'
 71 |         group by 1, 2
 72 |         order by hour_of_day
 73 |     )
 74 | 
 75 | select
 76 |     *,
 77 |     orders / days as orders_per_hour
 78 | from orders_by_hour
 79 | ```
 80 | 
 81 | {#each ['Weekday', 'Weekend'] as day_type}
 82 | 
 83 | <BarChart
 84 |     data={orders_hour_of_day.filter(d => d.day_type === day_type)}
 85 |     x=hour_of_day
 86 |     y=orders_per_hour
 87 |     yAxisTitle=true
 88 |     xAxisTitle=true
 89 |     yMax=60
 90 |     title="{day_type} - Orders by Hour of Day"
 91 | />
 92 | 
 93 | {/each}
 94 | 
 95 | We see a significant peak in order volume between 7 and 9am on weekdays. There is also a slight increase in volume around lunch times (12-2pm) across all days of the week.
 96 | 
 97 | ## Dayparts
 98 | Based on the volumes shown above, we can break down our dayparts as:
 99 | - Breakfast: 7-9am
100 | - Late Morning: 9am-12pm
101 | - Lunch: 12-2pm
102 | - Late Afternoon: 2-5pm
103 | 
104 | In future analyses, these timeframes should be lined up with any existing operational timeframes (e.g., breakfast, lunch service windows).
105 | 
106 | ```dayparts
107 | with
108 |     orders_add_daypart as (
109 |         select
110 |             *,
111 |             case
112 |                 when hour_of_day between 7 and 8 then 'Breakfast'
113 |                 when hour_of_day between 9 and 11 then 'Late Morning'
114 |                 when hour_of_day between 12 and 14 then 'Lunch'
115 |                 when hour_of_day between 15 and 24 then 'Late Afternoon'
116 |             end as daypart
117 |         from ${orders_hour_of_day}
118 |     ),
119 | 
120 |     orders_by_daypart as (
121 |         select
122 |             daypart,
123 |             day_type,
124 |             sum(orders) / sum(days) as orders_per_hour,
125 |             sum(orders) as orders
126 |         from orders_add_daypart
127 |         group by daypart, day_type
128 |     )
129 | 
130 |     select
131 |         *,
132 |         orders / sum(orders) over () as orders_pct1
133 |     from orders_by_daypart
134 | ```
135 | 
136 | <BarChart
137 |     data={dayparts}
138 |     x=daypart
139 |     y=orders_pct1
140 |     series=day_type
141 |     yAxisTitle="% of Total Orders"
142 |     title="Breakdown of Orders by Daypart"
143 |     swapXY=true
144 | />
145 | 
146 | Almost half of all orders are generated from breakfast on weekdays. This might be driven by orders from customers who are on their way to work - a follow-up analysis on customer purchasing behaviour should be completed to investigate this.
147 | 
148 | ## Conclusions
149 | - Weekdays generate significantly more orders than weekend days (~4x more orders on an average weekday compared to an average weekend day)
150 | - Early mornings (7-9am) on weekdays generate almost half of all orders for the company
151 | - There was a drop in orders in June 2017 - this has not been covered in this analysis, but should be investigated
152 | 
153 | ### Recommended Follow-on Analyses
154 | - Investigate drop in orders in June 2017
155 | - Study customer purchasing behaviour, especially during weekday early mornings
156 | - Extend this analysis with a longer timeframe to investigate seasonality throughout the calendar year
157 | 
158 | 


--------------------------------------------------------------------------------
/test_projects/revenue/reports/pages/customers/[customer].md:
--------------------------------------------------------------------------------
 1 | # {$page.params.customer}'s Customer Profile
 2 | 
 3 | ```customers
 4 | select
 5 |     *,
 6 |     first_ordered_at as first_order_longdate,
 7 |     last_ordered_at as last_order_longdate,
 8 |     lifetime_spend as lifetime_spend_usd,
 9 |     lifetime_spend / count_lifetime_orders as average_order_value_usd
10 | from analytics.customers
11 | ```
12 | 
13 | {$page.params.customer} has been a customer since <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=first_order_longdate/>, with their most recent order occurring on <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=last_order_longdate/>.
14 | 
15 | ### Key stats:
16 | - <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=count_lifetime_orders/> lifetime orders
17 | - <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=lifetime_spend_usd/> in lifetime spend
18 | - <Value data={customers.filter(d => d.customer_name === $page.params.customer)} column=average_order_value_usd/> average order value
19 | 
20 | ```monthly_purchases
21 | select
22 |     date_trunc('month', a.ordered_at) as month,
23 |     b.customer_name,
24 |     sum(a.order_total) as purchases_usd
25 | from analytics.orders a
26 | left join analytics.customers b
27 | on a.customer_id = b.customer_id
28 | group by month, customer_name
29 | order by month asc
30 | ```
31 | 
32 | <BarChart
33 |     data={monthly_purchases.filter(d => d.customer_name === $page.params.customer)}
34 |     x=month
35 |     y=purchases_usd
36 |     title="Purchases per Month by {$page.params.customer}"
37 | />


--------------------------------------------------------------------------------
/test_projects/revenue/reports/pages/customers/index.md:
--------------------------------------------------------------------------------
 1 | # Customers
 2 | 
 3 | ```customers
 4 | select
 5 |     customer_name,
 6 |     concat('/customers/', customer_name) as customer_link,
 7 |     count_lifetime_orders as lifetime_orders,
 8 |     lifetime_spend as lifetime_spend_usd,
 9 |     lifetime_spend / count_lifetime_orders as average_order_value_usd
10 | from analytics.customers
11 | order by lifetime_spend_usd desc
12 | ```
13 | 
14 | Click a row to see the report for that customer:
15 | <DataTable data={customers} search=true link=customer_link showLinkCol=false>
16 |     <Column id=customer_name/>
17 |     <Column id=lifetime_orders/>
18 |     <Column id=lifetime_spend_usd/>
19 |     <Column id=average_order_value_usd/>
20 | </DataTable>


--------------------------------------------------------------------------------
/test_projects/revenue/reports/pages/index.md:
--------------------------------------------------------------------------------
  1 | # Welcome to Jaffle Shop 🥪
  2 | 
  3 | ```monthly_stats
  4 | with
  5 | monthly_stats as (
  6 |     select
  7 |         date_trunc('month', ordered_at) as month,
  8 |         sum(order_total) as revenue_usd1k,
  9 |         count(*)::float as orders,
 10 |         count(distinct customer_id)::float as customers
 11 | 
 12 |     from analytics.orders
 13 |     group by month
 14 |     order by month desc
 15 | )
 16 | 
 17 | select
 18 |     *,
 19 |     revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1,
 20 |     orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1,
 21 |     customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1,
 22 |     monthname(month) as month_name
 23 | from monthly_stats
 24 | ```
 25 | 
 26 | <BigValue
 27 |     data={monthly_stats}
 28 |     value=revenue_usd1k
 29 |     comparison=revenue_growth_pct1
 30 |     title="Monthly Revenue"
 31 |     comparisonTitle="vs. prev. month"
 32 | />
 33 | 
 34 | <BigValue
 35 |     data={monthly_stats}
 36 |     value=orders
 37 |     comparison=order_growth_pct1
 38 |     title="Monthly Orders"
 39 |     comparisonTitle="vs. prev. month"
 40 | />
 41 | 
 42 | Jaffle Shop locations served <Value data={monthly_stats} column=customers/> happy customers in <Value data={monthly_stats} column=month_name/>. This was a change of <Value data={monthly_stats} column=customer_growth_pct1/> from <Value data={monthly_stats} column=month_name row=1/>.
 43 | 
 44 | ## Store Openings
 45 | 
 46 | ```store_opening
 47 | with
 48 | most_recent_open as (
 49 |   select
 50 |       location_name as opened_store,
 51 |       min(ordered_at) as opened_date_mmmyyyy,
 52 |       sum(order_total) as opened_revenue_usd
 53 |   from analytics.orders
 54 |   group by location_name
 55 |   order by opened_date_mmmyyyy desc
 56 |   limit 1
 57 | ),
 58 | 
 59 | company_total as (
 60 |   select
 61 |     sum(order_total) as company_revenue_usd,
 62 |   from analytics.orders
 63 |   cross join most_recent_open
 64 |   where ordered_at >= opened_date_mmmyyyy
 65 | )
 66 | 
 67 | select
 68 |   *,
 69 |   opened_revenue_usd / company_revenue_usd as revenue_pct
 70 | from most_recent_open
 71 | cross join company_total
 72 | ```
 73 | 
 74 | The most recent Jaffle Shop store opening was <Value data={store_opening} column=opened_store/> in <Value data={store_opening} column=opened_date_mmmyyyy/>. Since opening, <Value data={store_opening} column=opened_store/> has contributed <Value data={store_opening} column=revenue_pct/> of total company sales.
 75 | 
 76 | ```orders_per_week
 77 | select
 78 |     date_trunc('week', ordered_at) as week,
 79 |     location_name,
 80 |     count(*) as orders,
 81 |     sum(order_total) as revenue_usd
 82 | 
 83 | from analytics.orders
 84 | 
 85 | group by 1,2
 86 | order by 1
 87 | ```
 88 | 
 89 | <AreaChart
 90 |     data={orders_per_week}
 91 |     x=week
 92 |     y=revenue_usd
 93 |     yAxisTitle="revenue per week"
 94 |     series=location_name
 95 |     title="Weekly Revenue by Store Location"
 96 |     subtitle="Last 12 Months"
 97 | />
 98 | 
 99 | ```revenue_per_city
100 | select
101 |     location_name as city,
102 |     concat('/stores/', location_name) as store_link,
103 |     count(distinct customer_id) as customers,
104 |     count(*) as orders,
105 |     sum(order_total) as revenue_usd
106 | 
107 | from analytics.orders
108 | 
109 | group by 1, 2
110 | ```
111 | 
112 | ## Reports on Individual Stores
113 | Click a row to see the report for that store:
114 | <DataTable data={revenue_per_city} link=store_link/>
115 | 
116 | ## Seasonality
117 | See [Seasonality Investigation](/analysis/seasonality-investigation) for more information.
118 | 
119 | ## Customers
120 | To see individual customer purchase history, see [Customers](/customers)
121 | 
122 | ### Customer Cohorts
123 | Average order values are tracked using monthly cohorts, which are created by truncating `first_order_date` to month.
124 | 
125 | ```customers_with_cohort
126 | select
127 |     *,
128 |     date_trunc('month', first_ordered_at) as cohort_month,
129 |     lifetime_spend_pretax / count_lifetime_orders as average_order_value_usd0
130 | 
131 | from analytics.customers
132 | ```
133 | 
134 | ```cohorts_aov
135 | select
136 |     cohort_month,
137 |     avg(average_order_value_usd0) as cohort_aov_usd
138 | 
139 | from ${customers_with_cohort}
140 | 
141 | group by 1
142 | order by cohort_month
143 | ```
144 | 
145 | <BarChart
146 |     data={cohorts_aov}
147 |     x=cohort_month
148 |     y=cohort_aov_usd
149 |     yAxisTitle="average order value"
150 |     xAxisTitle="Monthly Cohort"
151 |     title="Customer AOV by first month cohort"
152 | />
153 | 
154 | ### Average Order Values
155 | 
156 | <Histogram
157 |     data={customers_with_cohort}
158 |     x=average_order_value_usd0
159 |     title="Distribution of AOVs"
160 |     subtitle="Customer count"
161 |     xAxisTitle=true
162 | />
163 | 


--------------------------------------------------------------------------------
/test_projects/revenue/reports/pages/stores/[city].md:
--------------------------------------------------------------------------------
 1 | # Jaffle Shop {$page.params.city} 🥪
 2 | 
 3 | ```opening
 4 | select
 5 |     location_name,
 6 |     min(month) as opened_month_mmmyyyy
 7 | from ${monthly_stats}
 8 | group by location_name
 9 | order by opened_month_mmmyyyy desc
10 | ```
11 | 
12 | {#if opening[0].location_name === $page.params.city}
13 | 
14 | {$page.params.city} is the most recent store opening for Jaffle Shop, opened in <Value data={opening.filter(d => d.location_name === $page.params.city)} column=opened_month_mmmyyyy />.
15 | 
16 | {:else}
17 | 
18 | The {$page.params.city} location was opened in <Value data={opening.filter(d => d.location_name === $page.params.city)} column=opened_month_mmmyyyy />.
19 | 
20 | {/if}
21 | 
22 | ```monthly_stats
23 | with
24 | monthly_stats as (
25 |     select
26 |         date_trunc('month', ordered_at) as month,
27 |         location_name,
28 |         sum(order_total) as revenue_usd1k,
29 |         count(*)::float as orders,
30 |         count(distinct customer_id)::float as customers
31 | 
32 |     from analytics.orders
33 |     group by month, location_name
34 |     order by month desc
35 | )
36 | 
37 | select
38 |     *,
39 |     revenue_usd1k / (lag(revenue_usd1k, -1) over (order by month desc)) - 1 as revenue_growth_pct1,
40 |     orders / (lag(orders, -1) over (order by month desc)) - 1 as order_growth_pct1,
41 |     customers / (lag(customers, -1) over (order by month desc)) - 1 as customer_growth_pct1,
42 |     monthname(month) as month_name
43 | from monthly_stats
44 | ```
45 | 
46 | <BigValue
47 |     data={monthly_stats.filter(data => data.location_name === $page.params.city)}
48 |     value=revenue_usd1k
49 |     comparison=revenue_growth_pct1
50 |     title="Monthly Revenue"
51 |     comparisonTitle="vs. prev. month"
52 | />
53 | 
54 | <BigValue
55 |     data={monthly_stats.filter(data => data.location_name === $page.params.city)}
56 |     value=orders
57 |     comparison=order_growth_pct1
58 |     title="Monthly Orders"
59 |     comparisonTitle="vs. prev. month"
60 | />
61 | 
62 | Jaffle Shop {$page.params.city} served <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=customers/> happy customers in <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=month_name/>. This was a change of <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=customer_growth_pct1/> from <Value data={monthly_stats.filter(d => d.location_name === $page.params.city)} column=month_name row=1/>.
63 | 
64 | ```orders_per_week
65 | select
66 |     location_name as city,
67 |     date_trunc('week', ordered_at) as week,
68 |     count(*) as orders
69 | 
70 | from analytics.orders
71 | 
72 | group by 1, 2
73 | order by 1, 2
74 | ```
75 | 
76 | ## Orders Per Week in {$page.params.city}
77 | 
78 | <LineChart
79 |     data={orders_per_week.filter(data => data.city === $page.params.city)}
80 |     x=week
81 |     y=orders
82 |     yAxisTitle="orders per week in {$page.params.city}"
83 | />


--------------------------------------------------------------------------------
/test_projects/revenue/reports/pages/stores/index.md:
--------------------------------------------------------------------------------
 1 | # Stores
 2 | 
 3 | ```revenue_per_city
 4 | select
 5 |     location_name as city,
 6 |     concat('/stores/', location_name) as store_link,
 7 |     count(distinct customer_id) as customers,
 8 |     count(*) as orders,
 9 |     sum(order_total) as revenue_usd
10 | 
11 | from analytics.orders
12 | 
13 | group by 1, 2
14 | ```
15 | 
16 | Click a row to see the report for that store:
17 | <DataTable data={revenue_per_city} link=store_link/>


--------------------------------------------------------------------------------
/test_projects/revenue/requirements.txt:
--------------------------------------------------------------------------------
1 | dbt-postgres==1.6.0-b8
2 | jafgen~=0.3.1
3 | pre-commit~=3.0.4
4 | sqlfluff-templater-dbt~=2.0.0a5
5 | sqlfluff~=2.0.0a5


--------------------------------------------------------------------------------
/test_projects/revenue/seeds/__seeds.yml:
--------------------------------------------------------------------------------
 1 | seeds:
 2 |   - name: integers
 3 |     config:
 4 |       # Manually add to config to support dbt-core 1.6.x. Note that you
 5 |       # cannot have both in latest version of 1.7.x.
 6 |       access: public
 7 | 
 8 |   - name: seed_accounts
 9 |     config:
10 |       access: private
11 | 


--------------------------------------------------------------------------------
/test_projects/revenue/seeds/integers.csv:
--------------------------------------------------------------------------------
1 | id
2 | 1
3 | 2
4 | 3
5 | 4


--------------------------------------------------------------------------------
/test_projects/revenue/seeds/seed_accounts.csv:
--------------------------------------------------------------------------------
1 | name
2 | foo
3 | bar
4 | baz


--------------------------------------------------------------------------------
/test_projects/revenue/snapshots/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/snapshots/.gitkeep


--------------------------------------------------------------------------------
/test_projects/revenue/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholasyager/dbt-loom/114df17ee065dc7d40ad7798cf02a8990539ff6d/test_projects/revenue/tests/.gitkeep


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | def test_dbt_loom_injects_model():
2 |     """Test if a dbtLoom model is injected into a dbt context."""
3 |     pass
4 | 


--------------------------------------------------------------------------------
/tests/test_dbt_core_execution.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | import dbt
  5 | from dbt.cli.main import dbtRunner, dbtRunnerResult
  6 | 
  7 | 
  8 | import dbt.exceptions
  9 | 
 10 | 
 11 | starting_path = os.getcwd()
 12 | 
 13 | 
 14 | def test_dbt_core_runs_loom_plugin():
 15 |     """Verify that dbt-core runs the dbt-loom plugin and nodes are injected."""
 16 | 
 17 |     runner = dbtRunner()
 18 | 
 19 |     # Compile the revenue project
 20 | 
 21 |     os.chdir(f"{starting_path}/test_projects/revenue")
 22 |     runner.invoke(["clean"])
 23 |     runner.invoke(["deps"])
 24 |     runner.invoke(["compile"])
 25 | 
 26 |     # Run `build` in the customer_success project
 27 |     os.chdir(f"{starting_path}/test_projects/customer_success")
 28 |     runner.invoke(["clean"])
 29 |     runner.invoke(["deps"])
 30 |     output: dbtRunnerResult = runner.invoke(["build"])
 31 | 
 32 |     # Make sure nothing failed
 33 |     assert output.exception is None
 34 | 
 35 |     output: dbtRunnerResult = runner.invoke(["ls"])
 36 | 
 37 |     # Make sure nothing failed
 38 |     assert output.exception is None
 39 | 
 40 |     # Check for injection
 41 |     assert isinstance(output.result, list)
 42 | 
 43 |     # Check that the versioned models work.
 44 |     subset = {
 45 |         "revenue.orders.v1",
 46 |         "revenue.orders.v2",
 47 |     }
 48 | 
 49 |     # Excluded packages do not get injected and loaded into a manifest.
 50 |     assert not any(["dbt_project_evaluator" in item for item in output.result])
 51 | 
 52 |     os.chdir(starting_path)
 53 | 
 54 |     assert set(output.result).issuperset(
 55 |         subset
 56 |     ), "The child project is missing expected nodes. Check that injection still works."
 57 | 
 58 | 
 59 | def test_dbt_loom_injects_dependencies():
 60 |     """Verify that dbt-core runs the dbt-loom plugin and that it flags access violations."""
 61 | 
 62 |     runner = dbtRunner()
 63 | 
 64 |     # Compile the revenue project
 65 |     os.chdir(f"{starting_path}/test_projects/revenue")
 66 |     runner.invoke(["clean"])
 67 |     runner.invoke(["deps"])
 68 |     output = runner.invoke(["compile"])
 69 | 
 70 |     assert output.exception is None, output.exception.get_message()  # type: ignore
 71 | 
 72 |     path = Path(
 73 |         f"{starting_path}/test_projects/customer_success/models/staging/stg_orders_enhanced.sql"
 74 |     )
 75 | 
 76 |     with open(path, "w") as file:
 77 |         file.write(
 78 |             """
 79 |             with
 80 |             upstream as (
 81 |                 select * from {{ ref('revenue', 'stg_orders') }}
 82 |             )
 83 | 
 84 |             select * from upstream
 85 |             """
 86 |         )
 87 | 
 88 |     # Run `ls`` in the customer_success project
 89 |     os.chdir(f"{starting_path}/test_projects/customer_success")
 90 |     runner.invoke(["clean"])
 91 |     runner.invoke(["deps"])
 92 |     output: dbtRunnerResult = runner.invoke(["build"])
 93 | 
 94 |     path.unlink()
 95 | 
 96 |     os.chdir(starting_path)
 97 | 
 98 |     # Make sure nothing failed
 99 |     assert isinstance(output.exception, dbt.exceptions.DbtReferenceError)
100 | 
101 | 
102 | def test_dbt_loom_injects_groups():
103 |     """Verify that dbt-core runs the dbt-loom plugin and that it flags group violations."""
104 | 
105 |     runner = dbtRunner()
106 | 
107 |     # Compile the revenue project
108 |     os.chdir(f"{starting_path}/test_projects/revenue")
109 |     runner.invoke(["clean"])
110 |     runner.invoke(["deps"])
111 |     output = runner.invoke(["compile"])
112 | 
113 |     assert output.exception is None
114 | 
115 |     path = Path(
116 |         f"{starting_path}/test_projects/customer_success/models/marts/marketing_lists.sql"
117 |     )
118 | 
119 |     with open(path, "w") as file:
120 |         file.write(
121 |             """
122 |             with
123 |             upstream as (
124 |                 select * from {{ ref('accounts') }}
125 |             )
126 | 
127 |             select * from upstream
128 |             """
129 |         )
130 | 
131 |     # Run `ls`` in the customer_success project
132 |     os.chdir(f"{starting_path}/test_projects/customer_success")
133 |     runner.invoke(["clean"])
134 |     runner.invoke(["deps"])
135 |     output: dbtRunnerResult = runner.invoke(["build"])
136 | 
137 |     path.unlink()
138 | 
139 |     os.chdir(starting_path)
140 | 
141 |     # Make sure nothing failed
142 |     assert isinstance(output.exception, dbt.exceptions.DbtReferenceError)
143 | 
144 | 
145 | def test_dbt_core_telemetry_blocking():
146 |     """Verify that dbt-loom prevents telemetry about itself from being sent."""
147 |     import shutil
148 | 
149 |     runner = dbtRunner()
150 | 
151 |     # Compile the revenue project
152 | 
153 |     os.chdir(f"{starting_path}/test_projects/revenue")
154 |     runner.invoke(["clean"])
155 |     runner.invoke(["deps"])
156 |     shutil.rmtree("logs")
157 |     runner.invoke(["compile"])
158 | 
159 |     # Check that no plugin events were sent. This is important to verify that
160 |     # telemetry blocking is working.
161 |     with open("logs/dbt.log") as log_file:
162 |         assert "plugin_get_nodes" not in log_file.read()
163 | 
164 |     os.chdir(starting_path)
165 | 


--------------------------------------------------------------------------------
/tests/test_mainfest_node.py:
--------------------------------------------------------------------------------
 1 | from dbt_loom.manifests import ManifestNode
 2 | 
 3 | 
 4 | try:
 5 |     from dbt.artifacts.resources.types import NodeType
 6 | except ModuleNotFoundError:
 7 |     from dbt.node_types import NodeType  # type: ignore
 8 | 
 9 | 
10 | def test_rewrite_resource_types():
11 |     """Confirm that resource types are rewritten if they are incorrect due to previous injections."""
12 | 
13 |     node = {
14 |         "unique_id": "seed.example.foo",
15 |         "name": "foo",
16 |         "package_name": "example",
17 |         "schema": "bar",
18 |         "resource_type": "model",
19 |     }
20 | 
21 |     manifest_node = ManifestNode(**(node))  # type: ignore
22 | 
23 |     assert manifest_node.resource_type == NodeType.Seed
24 | 


--------------------------------------------------------------------------------
/tests/test_manifest_loaders.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | 
  4 | from typing import Dict, Generator, Tuple
  5 | from urllib.parse import urlparse
  6 | 
  7 | import pytest
  8 | from dbt_loom.config import (
  9 |     FileReferenceConfig,
 10 |     ManifestReference,
 11 |     ManifestReferenceType,
 12 |     LoomConfigurationError,
 13 | )
 14 | from dbt_loom.manifests import ManifestLoader, UnknownManifestPathType
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def example_file() -> Generator[Tuple[Path, Dict], None, None]:
 19 |     example_content = {"foo": "bar"}
 20 |     path = Path("example.json")
 21 |     with open(path, "w") as file:
 22 |         json.dump(example_content, file)
 23 |     yield path, example_content
 24 |     path.unlink()
 25 | 
 26 | 
 27 | def test_load_from_local_filesystem_pass(example_file):
 28 |     """Test that ManifestLoader can load a local JSON file."""
 29 | 
 30 |     path, example_content = example_file
 31 | 
 32 |     file_config = FileReferenceConfig(
 33 |         path=urlparse("file://" + str(Path(path).absolute()))
 34 |     )
 35 | 
 36 |     output = ManifestLoader.load_from_local_filesystem(file_config)
 37 | 
 38 |     assert output == example_content
 39 | 
 40 | 
 41 | def test_load_from_local_filesystem_local_path(example_file):
 42 |     """Test that ManifestLoader can load a local JSON file."""
 43 | 
 44 |     path, example_content = example_file
 45 | 
 46 |     file_config = FileReferenceConfig(path=str(path))  # type: ignore
 47 | 
 48 |     output = ManifestLoader.load_from_local_filesystem(file_config)
 49 | 
 50 |     assert output == example_content
 51 | 
 52 | 
 53 | def test_load_from_path_fails_invalid_scheme(example_file):
 54 |     """
 55 |     est that ManifestLoader will raise the appropriate exception if an invalid
 56 |     scheme is applied.
 57 |     """
 58 | 
 59 |     file_config = FileReferenceConfig(
 60 |         path=urlparse("ftp://example.com/example.json"),
 61 |     )  # type: ignore
 62 | 
 63 |     with pytest.raises(UnknownManifestPathType):
 64 |         ManifestLoader.load_from_path(file_config)
 65 | 
 66 | 
 67 | def test_load_from_remote_pass(example_file):
 68 |     """Test that ManifestLoader can load a remote JSON file via HTTP(S)."""
 69 | 
 70 |     _, example_content = example_file
 71 | 
 72 |     file_config = FileReferenceConfig(
 73 |         path=urlparse(
 74 |             "https://s3.us-east-2.amazonaws.com/com.nicholasyager.dbt-loom/example.json"
 75 |         ),
 76 |     )
 77 | 
 78 |     output = ManifestLoader.load_from_http(file_config)
 79 | 
 80 |     assert output == example_content
 81 | 
 82 | 
 83 | def test_manifest_loader_selection(example_file):
 84 |     """Confirm scheme parsing works for picking the manifest loader."""
 85 |     _, example_content = example_file
 86 |     manifest_loader = ManifestLoader()
 87 | 
 88 |     file_config = FileReferenceConfig(
 89 |         path=urlparse(
 90 |             "https://s3.us-east-2.amazonaws.com/com.nicholasyager.dbt-loom/example.json"
 91 |         ),
 92 |     )
 93 | 
 94 |     manifest_reference = ManifestReference(
 95 |         name="example", type=ManifestReferenceType.file, config=file_config
 96 |     )
 97 | 
 98 |     manifest = manifest_loader.load(manifest_reference)
 99 | 
100 |     assert manifest == example_content
101 | 
102 | 
103 | def test_load_from_local_filesystem_optional_missing():
104 |     """If the manifest file does not exist, it should not raise an error if optional=True."""
105 |     file_config = FileReferenceConfig(
106 |         path="not_exist_manifest.json"
107 |     )
108 |     manifest_reference = ManifestReference(
109 |         name="missing",
110 |         type=ManifestReferenceType.file,
111 |         config=file_config,
112 |         optional=True,
113 |     )
114 |     manifest_loader = ManifestLoader()
115 |     manifest = manifest_loader.load(manifest_reference)
116 |     assert manifest is None
117 | 
118 | 
119 | def test_load_from_local_filesystem_not_optional_missing():
120 |     """If the manifest file does not exist, it should raise an error if optional=False."""
121 |     file_config = FileReferenceConfig(
122 |         path="not_exist_manifest.json"
123 |     )
124 |     manifest_reference = ManifestReference(
125 |         name="missing",
126 |         type=ManifestReferenceType.file,
127 |         config=file_config,
128 |         optional=False,
129 |     )
130 |     manifest_loader = ManifestLoader()
131 |     with pytest.raises(LoomConfigurationError):
132 |         manifest_loader.load(manifest_reference)
133 | 


--------------------------------------------------------------------------------