├── .github
├── dependabot.yml
└── workflows
│ ├── deploy-documentation.yml
│ ├── release-test.yml
│ ├── release.yml
│ ├── tests-dbt-version.yml
│ └── tests.yml
├── .gitignore
├── .idea
├── .gitignore
├── git_toolbox_blame.xml
├── git_toolbox_prj.xml
├── misc.xml
├── modules.xml
├── opendbt.iml
├── runConfigurations
│ ├── Python_tests_in_tests.xml
│ ├── pip_install.xml
│ └── pylint.xml
└── vcs.xml
├── .pylintrc
├── LICENSE
├── README.md
├── docs
├── assets
│ ├── airflow-dbt-docs-link.png
│ ├── airflow-dbt-docs-page.png
│ ├── airflow-dbt-flow.png
│ ├── dbt-custom-adapter-python.png
│ ├── dbt-local-python.png
│ ├── docs-columns-transformation.png
│ ├── docs-dependencies.png
│ ├── docs-lineage.png
│ ├── docs-run-info-error.png
│ ├── docs-run-info.png
│ └── opendbt-airflow-ui.png
├── catalog.md
├── examples.md
├── index.md
└── opendbtdocs
│ ├── catalog.json
│ ├── catalogl.json
│ ├── index.html
│ ├── manifest.json
│ ├── run_info.json
│ └── run_results.json
├── mkdocs.yml
├── opendbt
├── __init__.py
├── __main__.py
├── airflow
│ ├── __init__.py
│ └── plugin.py
├── catalog
│ └── __init__.py
├── dbt
│ ├── __init__.py
│ ├── docs
│ │ ├── .gitignore
│ │ └── index.html
│ ├── shared
│ │ ├── __init__.py
│ │ ├── adapters
│ │ │ ├── __init__.py
│ │ │ └── impl.py
│ │ ├── cli
│ │ │ ├── __init__.py
│ │ │ └── main.py
│ │ └── task
│ │ │ ├── __init__.py
│ │ │ └── sqlfluff.py
│ ├── v17
│ │ ├── __init__.py
│ │ ├── adapters
│ │ │ ├── __init__.py
│ │ │ └── factory.py
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ └── runtime.py
│ │ └── task
│ │ │ ├── __init__.py
│ │ │ ├── docs
│ │ │ ├── __init__.py
│ │ │ └── generate.py
│ │ │ └── run.py
│ └── v18
│ │ ├── __init__.py
│ │ ├── adapters
│ │ ├── __init__.py
│ │ └── factory.py
│ │ ├── artifacts
│ │ ├── __init__.py
│ │ └── schemas
│ │ │ ├── __init__.py
│ │ │ └── run.py
│ │ ├── config
│ │ ├── __init__.py
│ │ └── runtime.py
│ │ └── task
│ │ ├── __init__.py
│ │ ├── docs
│ │ ├── __init__.py
│ │ └── generate.py
│ │ └── run.py
├── examples.py
├── logger.py
├── macros
│ ├── executedlt.sql
│ ├── executepython.sql
│ └── executesql.sql
├── runtime_patcher.py
└── utils.py
├── pyproject.toml
└── tests
├── base_dbt_test.py
├── resources
├── airflow
│ ├── Dockerfile
│ ├── airflow
│ │ ├── airflow.cfg
│ │ └── webserver_config.py
│ ├── dags
│ │ ├── dbt_mesh_workflow.py
│ │ ├── dbt_tests_workflow.py
│ │ └── dbt_workflow.py
│ ├── docker-compose.yaml
│ └── plugins
│ │ └── airflow_dbtdocs_page.py
├── dbtcore
│ ├── .gitignore
│ ├── .sqlfluff
│ ├── dbt_project.yml
│ ├── models
│ │ ├── my_core_table1.sql
│ │ ├── my_executedlt_model.py
│ │ ├── my_executepython_dlt_model.py
│ │ ├── my_executepython_model.py
│ │ ├── my_executesql_dbt_model.sql
│ │ ├── my_failing_dbt_model.sql
│ │ ├── my_first_dbt_model.sql
│ │ ├── my_second_dbt_model.sql
│ │ └── schema.yml
│ └── profiles.yml
└── dbtfinance
│ ├── .gitignore
│ ├── dbt_project.yml
│ ├── dependencies.yml
│ ├── macros
│ └── generate_schema_name.sql
│ ├── models
│ ├── my_cross_project_ref_model.sql
│ └── sources.yml
│ └── profiles.yml
├── test_airflow.py
├── test_catalog.py
├── test_custom_adapter.py
├── test_dbt_docs.py
├── test_dbt_sqlfluff.py
├── test_executedlt_materialization.py
├── test_executepython_materialization.py
├── test_executesql_materialization.py
├── test_main.py
├── test_opendbt_airflow.py
├── test_opendbt_cli.py
├── test_opendbt_mesh.py
└── test_opendbt_project.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 | - package-ecosystem: "pip"
8 | directory: "/"
9 | schedule:
10 | interval: "weekly"
11 |
--------------------------------------------------------------------------------
/.github/workflows/deploy-documentation.yml:
--------------------------------------------------------------------------------
1 | name: deploy-mkdocs-documentation
2 | on:
3 | push:
4 | branches:
5 | - master
6 | - main
7 | - docs
8 | permissions:
9 | contents: write
10 | jobs:
11 | deploy:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v4
15 | - name: Configure Git Credentials
16 | run: |
17 | git config user.name github-actions[bot]
18 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com
19 | - uses: actions/setup-python@v5
20 | with:
21 | python-version: 3.x
22 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
23 | - run: pip install mkdocs-material
24 | - run: mkdocs gh-deploy --force
--------------------------------------------------------------------------------
/.github/workflows/release-test.yml:
--------------------------------------------------------------------------------
1 | name: Create Test Pypi Release
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 |
7 | jobs:
8 | build:
9 | if: github.repository_owner == 'memiiso'
10 | runs-on: ubuntu-latest
11 | strategy:
12 | matrix:
13 | python-version: [ 3.8 ]
14 |
15 | steps:
16 | - uses: actions/checkout@v4
17 | - name: Set up Python ${{ matrix.python-version }}
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 | - name: Install pypa/build
22 | run: |
23 | python -m pip install build --user
24 | - name: Build a binary wheel and a source tarball
25 | run: |
26 | python -m build --sdist --wheel --outdir dist/ .
27 |
28 | - name: Publish main to Test Pypi
29 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/heads/main')
30 | uses: pypa/gh-action-pypi-publish@release/v1
31 | with:
32 | user: __token__
33 | password: ${{ secrets.TEST_PYPI_API_TOKEN }}
34 | repository_url: https://test.pypi.org/legacy/
35 | skip_existing: true
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Create Pypi Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - '*.*.*'
7 |
8 | jobs:
9 | build:
10 | if: github.repository_owner == 'memiiso'
11 | runs-on: ubuntu-latest
12 | strategy:
13 | matrix:
14 | python-version: [ 3.8 ]
15 |
16 | steps:
17 | - uses: actions/checkout@v4
18 | - name: Set up Python ${{ matrix.python-version }}
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 | - name: Install pypa/build
23 | run: |
24 | python -m pip install build --user
25 | - name: Build a binary wheel and a source tarball
26 | run: |
27 | python -m build --sdist --wheel --outdir dist/ .
28 |
29 | - name: Publish to Pypi
30 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
31 | uses: pypa/gh-action-pypi-publish@release/v1
32 | with:
33 | user: __token__
34 | password: ${{ secrets.PYPI_API_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/tests-dbt-version.yml:
--------------------------------------------------------------------------------
1 | name: Build and Test DBT Version
2 |
3 | on:
4 | workflow_call:
5 | inputs:
6 | dbt-version:
7 | required: true
8 | type: string
9 |
10 | jobs:
11 | test-dbt-version:
12 | runs-on: macos-latest
13 | strategy:
14 | fail-fast: false
15 | matrix:
16 | python-version: [ "3.9", "3.10", "3.11", "3.12" ]
17 | steps:
18 | - uses: actions/checkout@v4
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v5
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 | cache: 'pip' # caching pip dependencies
24 | - name: Build & Install DBT ${{ inputs.dbt-version }}
25 | run: |
26 | pip install -q coverage pylint
27 | pip install -q dbt-core==${{ inputs.dbt-version }}.* dbt-duckdb==${{ inputs.dbt-version }}.* --force-reinstall --upgrade
28 | # FIX for protobuf issue: https://github.com/dbt-labs/dbt-core/issues/9759
29 | pip install -q "apache-airflow<3.0.0" "protobuf>=4.25.3,<5.0.0" "opentelemetry-proto<1.28.0" --prefer-binary
30 | pip install -q .[test] --prefer-binary
31 | pip install -q dbt-core==${{ inputs.dbt-version }}.* dbt-duckdb==${{ inputs.dbt-version }}.* --force-reinstall --upgrade
32 | python --version
33 | python -c "from dbt.version import get_installed_version as get_dbt_version;print(f'dbt version={get_dbt_version()}')"
34 | python -m compileall -f opendbt
35 | python -m pylint opendbt
36 | - name: Run Tests
37 | run: |
38 | python -c "from dbt.version import get_installed_version as get_dbt_version;print(f'dbt version={get_dbt_version()}')"
39 | python -m coverage run --source=./tests/ -m unittest discover -s tests/
40 | python -m coverage report -m ./opendbt/*.py
41 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Build and Test
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches: [ main ]
7 | paths-ignore:
8 | - '.idea/**'
9 | - '.run/**'
10 | pull_request:
11 | branches: [ main ]
12 | paths-ignore:
13 | - '.idea/**'
14 | - '.run/**'
15 |
16 | jobs:
17 | test-dbt-1-7:
18 | uses: ./.github/workflows/tests-dbt-version.yml
19 | with:
20 | dbt-version: "1.7"
21 | needs: test-dbt-1-8
22 | test-dbt-1-8:
23 | uses: ./.github/workflows/tests-dbt-version.yml
24 | with:
25 | dbt-version: "1.8"
26 | needs: test-dbt-1-9
27 | test-dbt-1-9:
28 | uses: ./.github/workflows/tests-dbt-version.yml
29 | with:
30 | dbt-version: "1.9"
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | logs
2 | **.duckdb
3 | **.user.yml
4 | reset.sh
5 |
6 | ###### JetBrains ######
7 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
8 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
9 |
10 | # User-specific stuff
11 | .idea/**/workspace.xml
12 | .idea/**/tasks.xml
13 | .idea/**/usage.statistics.xml
14 | .idea/**/dictionaries
15 | .idea/**/shelf
16 |
17 | # AWS User-specific
18 | .idea/**/aws.xml
19 |
20 | # Generated files
21 | .idea/**/contentModel.xml
22 |
23 | # Sensitive or high-churn files
24 | .idea/**/dataSources/
25 | .idea/**/dataSources.ids
26 | .idea/**/dataSources.local.xml
27 | .idea/**/sqlDataSources.xml
28 | .idea/**/dynamic.xml
29 | .idea/**/uiDesigner.xml
30 | .idea/**/dbnavigator.xml
31 |
32 | # Gradle
33 | .idea/**/gradle.xml
34 | .idea/**/libraries
35 |
36 | # Gradle and Maven with auto-import
37 | # When using Gradle or Maven with auto-import, you should exclude module files,
38 | # since they will be recreated, and may cause churn. Uncomment if using
39 | # auto-import.
40 | # .idea/artifacts
41 | # .idea/compiler.xml
42 | # .idea/jarRepositories.xml
43 | # .idea/modules.xml
44 | # .idea/*.iml
45 | # .idea/modules
46 | # *.iml
47 | # *.ipr
48 |
49 | # CMake
50 | cmake-build-*/
51 |
52 | # Mongo Explorer plugin
53 | .idea/**/mongoSettings.xml
54 |
55 | # File-based project format
56 | *.iws
57 |
58 | # IntelliJ
59 | out/
60 |
61 | # mpeltonen/sbt-idea plugin
62 | .idea_modules/
63 |
64 | # JIRA plugin
65 | atlassian-ide-plugin.xml
66 |
67 | # Cursive Clojure plugin
68 | .idea/replstate.xml
69 |
70 | # SonarLint plugin
71 | .idea/sonarlint/
72 |
73 | # Crashlytics plugin (for Android Studio and IntelliJ)
74 | com_crashlytics_export_strings.xml
75 | crashlytics.properties
76 | crashlytics-build.properties
77 | fabric.properties
78 |
79 | # Editor-based Rest Client
80 | .idea/httpRequests
81 |
82 | # Android studio 3.1+ serialized cache file
83 | .idea/caches/build_file_checksums.ser
84 |
85 |
86 | ###### Python ######
87 | # Byte-compiled / optimized / DLL files
88 | __pycache__/
89 | *.py[cod]
90 | *$py.class
91 |
92 | # C extensions
93 | *.so
94 |
95 | # Distribution / packaging
96 | .Python
97 | build/
98 | develop-eggs/
99 | dist/
100 | downloads/
101 | eggs/
102 | .eggs/
103 | lib/
104 | lib64/
105 | parts/
106 | sdist/
107 | var/
108 | wheels/
109 | share/python-wheels/
110 | *.egg-info/
111 | .installed.cfg
112 | *.egg
113 | MANIFEST
114 |
115 | # PyInstaller
116 | # Usually these files are written by a python script from a template
117 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
118 | *.manifest
119 | *.spec
120 |
121 | # Installer logs
122 | pip-log.txt
123 | pip-delete-this-directory.txt
124 |
125 | # Unit test / coverage reports
126 | htmlcov/
127 | .tox/
128 | .nox/
129 | .coverage
130 | .coverage.*
131 | .cache
132 | nosetests.xml
133 | coverage.xml
134 | *.cover
135 | *.py,cover
136 | .hypothesis/
137 | .pytest_cache/
138 | cover/
139 |
140 | # Translations
141 | *.mo
142 | *.pot
143 |
144 | # Django stuff:
145 | *.log
146 | local_settings.py
147 | db.sqlite3
148 | db.sqlite3-journal
149 |
150 | # Flask stuff:
151 | instance/
152 | .webassets-cache
153 |
154 | # Scrapy stuff:
155 | .scrapy
156 |
157 | # Sphinx documentation
158 | docs/_build/
159 |
160 | # PyBuilder
161 | .pybuilder/
162 | target/
163 |
164 | # Jupyter Notebook
165 | .ipynb_checkpoints
166 |
167 | # IPython
168 | profile_default/
169 | ipython_config.py
170 |
171 | # pyenv
172 | # For a library or package, you might want to ignore these files since the code is
173 | # intended to run in multiple environments; otherwise, check them in:
174 | # .python-version
175 |
176 | # pipenv
177 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
178 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
179 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
180 | # install all needed dependencies.
181 | #Pipfile.lock
182 |
183 | # poetry
184 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
185 | # This is especially recommended for binary packages to ensure reproducibility, and is more
186 | # commonly ignored for libraries.
187 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
188 | #poetry.lock
189 |
190 | # pdm
191 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
192 | #pdm.lock
193 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
194 | # in version control.
195 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
196 | .pdm.toml
197 | .pdm-python
198 | .pdm-build/
199 |
200 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
201 | __pypackages__/
202 |
203 | # Celery stuff
204 | celerybeat-schedule
205 | celerybeat.pid
206 |
207 | # SageMath parsed files
208 | *.sage.py
209 |
210 | # Environments
211 | .env
212 | .venv
213 | env/
214 | venv/
215 | ENV/
216 | env.bak/
217 | venv.bak/
218 |
219 | # Spyder project settings
220 | .spyderproject
221 | .spyproject
222 |
223 | # Rope project settings
224 | .ropeproject
225 |
226 | # mkdocs documentation
227 | /site
228 |
229 | # mypy
230 | .mypy_cache/
231 | .dmypy.json
232 | dmypy.json
233 |
234 | # Pyre type checker
235 | .pyre/
236 |
237 | # pytype static type analyzer
238 | .pytype/
239 |
240 | # Cython debug symbols
241 | cython_debug/
242 |
243 | # PyCharm
244 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
245 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
246 | # and can be added to the global gitignore or merged into this file. For a more nuclear
247 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
248 | #.idea/
249 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/git_toolbox_blame.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/git_toolbox_prj.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | EmbeddedPerformanceJava
13 |
14 |
15 | Error handlingJava
16 |
17 |
18 | Groovy
19 |
20 |
21 | InitializationJava
22 |
23 |
24 | JVM languages
25 |
26 |
27 | Java
28 |
29 |
30 | Java 21Java language level migration aidsJava
31 |
32 |
33 | Java language level migration aidsJava
34 |
35 |
36 | Kotlin
37 |
38 |
39 | LoggingJVM languages
40 |
41 |
42 | MemoryJava
43 |
44 |
45 | PerformanceJava
46 |
47 |
48 | Probable bugsJava
49 |
50 |
51 | Python
52 |
53 |
54 | Redundant constructsKotlin
55 |
56 |
57 | RegExp
58 |
59 |
60 | Style issuesKotlin
61 |
62 |
63 | Threading issuesGroovy
64 |
65 |
66 | Threading issuesJava
67 |
68 |
69 | Verbose or redundant code constructsJava
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/opendbt.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Python_tests_in_tests.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/pip_install.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/pylint.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | disable=
3 | C, # convention
4 | W, # warnings
5 | import-error,
6 | no-name-in-module,
7 | too-many-arguments,
8 | too-many-positional-arguments,
9 | too-few-public-methods,
10 | no-member,
11 | unexpected-keyword-arg,
12 | R0801 # Similar lines in 2 files
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | [](http://www.apache.org/licenses/LICENSE-2.0.html)
3 | 
4 |
5 | # opendbt
6 |
7 | This project adds new capabilities to dbt-core by dynamically extending dbt's source code.
8 |
9 | dbt is a popular solution for batch data processing in data analytics. While it operates on
10 | an [open-core model](https://opencoreventures.com/blog/2023-07-open-core-is-misunderstood-business-model/), which can
11 | sometimes limit the inclusion of community features in the open-source version. no worries opendbt is here to solve it.
12 | opendbt offers a fully open-source package to address these concerns. **OpenDBT builds upon dbt-core, adding valuable
13 | features without changing dbt-core code.**
14 |
15 | With `opendbt` you can go beyond the core functionalities of dbt. For example seamlessly integrating your customized
16 | adapter and providing jinja context with further adapter/python methods.
17 |
18 | ## Features
19 |
20 | - :white_check_mark: Includes superior [dbt catalog UI](https://memiiso.github.io/opendbt/opendbtdocs/), user-friendly
21 | data catalog,
22 | including row level
23 | lineage, [see it here](https://memiiso.github.io/opendbt/opendbtdocs/)
24 | - :white_check_mark: Integrates Python and DLT Jobs to dbt. Enables Extract&Load (EL) with dbt.
25 | - :white_check_mark: Supports DBT Mesh setups. Supports running multiple projects which are using cross project ref
26 | models.
27 | - :white_check_mark: And many more features, customization options.
28 | - Customize Existing Adapters: add your custom logic to current adapters
29 | - By extending current adapter provide more functions to jinja
30 | - Execute Local Python
31 | Code: [run local Python code](https://medium.com/@ismail-simsek/make-dbt-great-again-ec34f3b661f5). For example, you
32 | could import data from web APIs directly within your dbt model.
33 | - [Integrate DLT](https://github.com/memiiso/opendbt/issues/40). Run end to end ETL pipeline with dbt and DLT.
34 | - [Use multi project dbt-mesh setup cross-project references](https://docs.getdbt.com/docs/collaborate/govern/project-dependencies#how-to-write-cross-project-ref).
35 | - This feature was only available in "dbt Cloud Enterprise" so far.
36 | - Granular Model-Level Orchestration with Airflow: Integrate Airflow for fine-grained control over model execution.
37 | - Serve dbt Docs in Airflow UI: Create a custom page on the Airflow server that displays dbt documentation as an
38 | Airflow
39 | UI page.
40 | - Register [dbt callbacks](https://docs.getdbt.com/reference/programmatic-invocations#registering-callbacks) within a
41 | dbt project to trigger custom actions or alerting based on selected dbt events.
42 |
43 | [See documentation for further details and detailed examples](https://memiiso.github.io/opendbt/).
44 |
45 | 
46 |
47 | ## Installation
48 |
49 | install from github or pypi:
50 |
51 | ```shell
52 | pip install opendbt==0.13.0
53 | # Or
54 | pip install https://github.com/memiiso/opendbt/archive/refs/tags/0.4.0.zip --upgrade --user
55 | ```
56 |
57 | ## **Your Contributions Matter**
58 |
59 | The project completely open-source, using the Apache 2.0 license.
60 | opendbt still is a young project and there are things to improve.
61 | Please feel free to test it, give feedback, open feature requests or send pull requests.
62 |
63 | ### Contributors
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/docs/assets/airflow-dbt-docs-link.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/airflow-dbt-docs-link.png
--------------------------------------------------------------------------------
/docs/assets/airflow-dbt-docs-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/airflow-dbt-docs-page.png
--------------------------------------------------------------------------------
/docs/assets/airflow-dbt-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/airflow-dbt-flow.png
--------------------------------------------------------------------------------
/docs/assets/dbt-custom-adapter-python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/dbt-custom-adapter-python.png
--------------------------------------------------------------------------------
/docs/assets/dbt-local-python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/dbt-local-python.png
--------------------------------------------------------------------------------
/docs/assets/docs-columns-transformation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-columns-transformation.png
--------------------------------------------------------------------------------
/docs/assets/docs-dependencies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-dependencies.png
--------------------------------------------------------------------------------
/docs/assets/docs-lineage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-lineage.png
--------------------------------------------------------------------------------
/docs/assets/docs-run-info-error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-run-info-error.png
--------------------------------------------------------------------------------
/docs/assets/docs-run-info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-run-info.png
--------------------------------------------------------------------------------
/docs/assets/opendbt-airflow-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/opendbt-airflow-ui.png
--------------------------------------------------------------------------------
/docs/catalog.md:
--------------------------------------------------------------------------------
1 | # Opendbt Catalog
2 |
3 | [See it in action](https://memiiso.github.io/opendbt/opendbtdocs/)
4 |
5 | Summary of the catalog files:
6 |
7 | - [catalog.json](catalog.json): Generated by dbt
8 | - [catalogl.json](catalogl.json): Generated by opendbt contains extended catalog information with column level lineage
9 | - [manifest.json](manifest.json): Generated by dbt
10 | - [run_info.json](run_info.json): Generated by opendbt, contains latest run information per object/model
11 |
12 | ## Key Features
13 |
14 | ### Up to date Run information
15 |
16 | 
17 |
18 | ### Run information with error messages
19 |
20 | 
21 |
22 | ### Model dependencies including tests
23 |
24 | 
25 |
26 | ### Column level dependency lineage, transformation
27 |
28 | 
29 |
30 | ### Dependency lineage
31 |
32 | 
--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | ## Using dbt with User-Defined Adapters and Jinja Methods
4 |
5 | To add custom methods to an existing adapter and expose them to Jinja templates, follow these steps:
6 |
7 | **Step-1:** Extend the Adapter
8 | Create a new adapter class that inherits from the desired base adapter. Add the necessary methods to this class.
9 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/opendbt/examples.py#L10-L26
10 |
11 | **Step-2:** In your `dbt_project.yml` file, set the `dbt_custom_adapter` variable to the fully qualified name of your
12 | custom adapter class. This will enable opendbt to recognize activate your adapter.
13 | ```yml
14 | vars:
15 | dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom
16 | ```
17 |
18 | **Step-3:** Execute dbt commands as usual. dbt will now load and utilize your custom adapter class, allowing you to
19 | access the newly defined methods within your Jinja macros.
20 | ```python
21 | from opendbt import OpenDbtProject
22 |
23 | dp = OpenDbtProject(project_dir="/dbt/project_dir", profiles_dir="/dbt/profiles_dir")
24 | dp.run(command="run")
25 | ```
26 |
27 | ## Executing Python Models Locally with dbt
28 |
29 | By leveraging a customized adapter and a custom materialization, dbt can be extended to execute Python code locally.
30 | This powerful capability is particularly useful for scenarios involving data ingestion from external APIs, enabling
31 | seamless integration within the dbt framework.
32 |
33 | **Step-1:** We'll extend an existing adapter (like `DuckDBAdapter`) to add a new method, `submit_local_python_job`. This
34 | method will execute the provided Python code as a subprocess.
35 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/opendbt/examples.py#L10-L26
36 |
37 | **Step-2:** Create a new materialization named `executepython`. This materialization will call the newly added
38 | `submit_local_python_job` method from the custom adapter to execute the compiled Python code.
39 |
40 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/opendbt/macros/executepython.sql#L1-L26
41 |
42 | **Step-3:** Let's create a sample Python model that will be executed locally by dbt using the executepython
43 | materialization.
44 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/tests/resources/dbttest/models/my_executepython_dbt_model.py#L1-L22
45 |
46 | ## Orchestrating dbt Models with Airflow
47 |
48 | **Step-1:** Let's create an Airflow DAG to orchestrate the execution of your dbt project.
49 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/tests/resources/airflow/dags/dbt_workflow.py#L17-L32
50 |
51 | 
52 |
53 | #### Creating Airflow DAG that selectively executes a specific subset of models from your dbt project.
54 |
55 | ```python
56 | from opendbt.airflow import OpenDbtAirflowProject
57 |
58 | # create dbt build tasks for models with given tag
59 | p = OpenDbtAirflowProject(resource_type='model', project_dir="/dbt/project_dir", profiles_dir="/dbt/profiles_dir",
60 | target='dev', tag="MY_TAG")
61 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end)
62 | ```
63 |
64 | #### Creating dag to run dbt tests
65 |
66 | ```python
67 | from opendbt.airflow import OpenDbtAirflowProject
68 |
69 | # create dbt test tasks with given model tag
70 | p = OpenDbtAirflowProject(resource_type='test', project_dir="/dbt/project_dir", profiles_dir="/dbt/profiles_dir",
71 | target='dev', tag="MY_TAG")
72 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end)
73 | ```
74 |
75 | ## Integrating dbt Documentation into Airflow
76 |
77 | Airflow, a powerful workflow orchestration tool, can be leveraged to streamline not only dbt execution but also dbt
78 | documentation access. By integrating dbt documentation into your Airflow interface, you can centralize your data
79 | engineering resources and improve team collaboration.
80 |
81 | here is how:
82 | **Step-1:** Create python file. Navigate to your Airflow's `{airflow}/plugins` directory.
83 | Create a new Python file and name it appropriately, such as `dbt_docs_plugin.py`. Add following code to
84 | `dbt_docs_plugin.py` file.
85 | Ensure that the specified path accurately points to the folder where your dbt project generates its documentation.
86 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/tests/resources/airflow/plugins/airflow_dbtdocs_page.py#L1-L6
87 |
88 | **Step-2:** Restart Airflow to activate the plugin. Once the restart is complete, you should see a new link labeled
89 | `DBT Docs` within your Airflow web interface. This link will provide access to your dbt documentation.
90 | 
91 |
92 | **Step-3:** Click on the `DBT Docs` link to open your dbt documentation.
93 | 
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | --8<-- "README.md"
--------------------------------------------------------------------------------
/docs/opendbtdocs/catalog.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json",
4 | "dbt_version": "1.9.4",
5 | "generated_at": "2025-05-15T18:01:15.469569Z",
6 | "invocation_id": "4a81f777-5b15-45b3-8c88-b08d0f7bb9ea",
7 | "env": {}
8 | },
9 | "nodes": {
10 | "model.dbtcore.my_core_table1": {
11 | "metadata": {
12 | "type": "BASE TABLE",
13 | "schema": "core",
14 | "name": "my_core_table1",
15 | "database": "dev",
16 | "comment": null,
17 | "owner": null
18 | },
19 | "columns": {
20 | "id": {
21 | "type": "INTEGER",
22 | "index": 1,
23 | "name": "id",
24 | "comment": null
25 | },
26 | "row_data": {
27 | "type": "VARCHAR",
28 | "index": 2,
29 | "name": "row_data",
30 | "comment": null
31 | }
32 | },
33 | "stats": {
34 | "has_stats": {
35 | "id": "has_stats",
36 | "label": "Has Stats?",
37 | "value": false,
38 | "include": false,
39 | "description": "Indicates whether there are statistics for this table"
40 | }
41 | },
42 | "unique_id": "model.dbtcore.my_core_table1"
43 | },
44 | "model.dbtcore.my_executedlt_model": {
45 | "metadata": {
46 | "type": "BASE TABLE",
47 | "schema": "core",
48 | "name": "my_executedlt_model",
49 | "database": "dev",
50 | "comment": null,
51 | "owner": null
52 | },
53 | "columns": {
54 | "event_tstamp": {
55 | "type": "TIMESTAMP WITH TIME ZONE",
56 | "index": 1,
57 | "name": "event_tstamp",
58 | "comment": null
59 | },
60 | "event_id": {
61 | "type": "BIGINT",
62 | "index": 2,
63 | "name": "event_id",
64 | "comment": null
65 | },
66 | "_dlt_load_id": {
67 | "type": "VARCHAR",
68 | "index": 3,
69 | "name": "_dlt_load_id",
70 | "comment": null
71 | },
72 | "_dlt_id": {
73 | "type": "VARCHAR",
74 | "index": 4,
75 | "name": "_dlt_id",
76 | "comment": null
77 | },
78 | "event_tstamp__v_text": {
79 | "type": "VARCHAR",
80 | "index": 5,
81 | "name": "event_tstamp__v_text",
82 | "comment": null
83 | }
84 | },
85 | "stats": {
86 | "has_stats": {
87 | "id": "has_stats",
88 | "label": "Has Stats?",
89 | "value": false,
90 | "include": false,
91 | "description": "Indicates whether there are statistics for this table"
92 | }
93 | },
94 | "unique_id": "model.dbtcore.my_executedlt_model"
95 | },
96 | "model.dbtcore.my_first_dbt_model": {
97 | "metadata": {
98 | "type": "BASE TABLE",
99 | "schema": "core",
100 | "name": "my_first_dbt_model",
101 | "database": "dev",
102 | "comment": null,
103 | "owner": null
104 | },
105 | "columns": {
106 | "id": {
107 | "type": "INTEGER",
108 | "index": 1,
109 | "name": "id",
110 | "comment": null
111 | },
112 | "data_value": {
113 | "type": "VARCHAR",
114 | "index": 2,
115 | "name": "data_value",
116 | "comment": null
117 | },
118 | "column_3": {
119 | "type": "VARCHAR",
120 | "index": 3,
121 | "name": "column_3",
122 | "comment": null
123 | }
124 | },
125 | "stats": {
126 | "has_stats": {
127 | "id": "has_stats",
128 | "label": "Has Stats?",
129 | "value": false,
130 | "include": false,
131 | "description": "Indicates whether there are statistics for this table"
132 | }
133 | },
134 | "unique_id": "model.dbtcore.my_first_dbt_model"
135 | },
136 | "model.dbtcore.my_second_dbt_model": {
137 | "metadata": {
138 | "type": "BASE TABLE",
139 | "schema": "core",
140 | "name": "my_second_dbt_model",
141 | "database": "dev",
142 | "comment": null,
143 | "owner": null
144 | },
145 | "columns": {
146 | "pk_id": {
147 | "type": "INTEGER",
148 | "index": 1,
149 | "name": "pk_id",
150 | "comment": null
151 | },
152 | "data_value1": {
153 | "type": "VARCHAR",
154 | "index": 2,
155 | "name": "data_value1",
156 | "comment": null
157 | },
158 | "data_value2": {
159 | "type": "VARCHAR",
160 | "index": 3,
161 | "name": "data_value2",
162 | "comment": null
163 | },
164 | "event_tstamp": {
165 | "type": "TIMESTAMP WITH TIME ZONE",
166 | "index": 4,
167 | "name": "event_tstamp",
168 | "comment": null
169 | }
170 | },
171 | "stats": {
172 | "has_stats": {
173 | "id": "has_stats",
174 | "label": "Has Stats?",
175 | "value": false,
176 | "include": false,
177 | "description": "Indicates whether there are statistics for this table"
178 | }
179 | },
180 | "unique_id": "model.dbtcore.my_second_dbt_model"
181 | }
182 | },
183 | "sources": {},
184 | "errors": null
185 | }
--------------------------------------------------------------------------------
/docs/opendbtdocs/catalogl.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json",
4 | "dbt_version": "1.9.4",
5 | "generated_at": "2025-05-15T18:01:15.469569Z",
6 | "invocation_id": "4a81f777-5b15-45b3-8c88-b08d0f7bb9ea",
7 | "env": {}
8 | },
9 | "nodes": {
10 | "model.dbtfinance.my_cross_project_ref_model": {
11 | "stats": {},
12 | "columns": {
13 | "id": {
14 | "name": "id",
15 | "type": "unknown",
16 | "column_fqn": "dev.finance.my_cross_project_ref_model.id",
17 | "table_fqn": "dev.finance.my_cross_project_ref_model",
18 | "table_relative_fqn": "finance.my_cross_project_ref_model",
19 | "transformations": [],
20 | "depends_on": []
21 | },
22 | "row_data": {
23 | "name": "row_data",
24 | "type": "unknown",
25 | "column_fqn": "dev.finance.my_cross_project_ref_model.row_data",
26 | "table_fqn": "dev.finance.my_cross_project_ref_model",
27 | "table_relative_fqn": "finance.my_cross_project_ref_model",
28 | "transformations": [],
29 | "depends_on": []
30 | },
31 | "num_rows": {
32 | "name": "num_rows",
33 | "type": "unknown",
34 | "column_fqn": "dev.finance.my_cross_project_ref_model.num_rows",
35 | "table_fqn": "dev.finance.my_cross_project_ref_model",
36 | "table_relative_fqn": "finance.my_cross_project_ref_model",
37 | "transformations": [],
38 | "depends_on": []
39 | }
40 | },
41 | "metadata": {}
42 | },
43 | "model.dbtcore.my_first_dbt_model": {
44 | "stats": {},
45 | "columns": {
46 | "data_value": {
47 | "name": "data_value",
48 | "description": "",
49 | "meta": {},
50 | "data_type": null,
51 | "constraints": [],
52 | "quote": null,
53 | "tags": [],
54 | "granularity": null,
55 | "type": "unknown",
56 | "column_fqn": "dev.core.my_first_dbt_model.data_value",
57 | "table_fqn": "dev.core.my_first_dbt_model",
58 | "table_relative_fqn": "core.my_first_dbt_model",
59 | "transformations": [
60 | "'test-value' AS data_value",
61 | "'test-value' AS data_value",
62 | "'test-value' AS data_value",
63 | "'test-value' AS data_value",
64 | "source_data.data_value AS data_value"
65 | ],
66 | "depends_on": []
67 | },
68 | "column_3": {
69 | "name": "column_3",
70 | "description": "",
71 | "meta": {},
72 | "data_type": null,
73 | "constraints": [],
74 | "quote": null,
75 | "tags": [],
76 | "granularity": null,
77 | "type": "unknown",
78 | "column_fqn": "dev.core.my_first_dbt_model.column_3",
79 | "table_fqn": "dev.core.my_first_dbt_model",
80 | "table_relative_fqn": "core.my_first_dbt_model",
81 | "transformations": [
82 | "'test-value' AS column_3",
83 | "'test-value' AS column_3",
84 | "'test-value' AS column_3",
85 | "'test-value' AS column_3",
86 | "source_data.column_3 AS column_3"
87 | ],
88 | "depends_on": []
89 | },
90 | "id": {
91 | "name": "id",
92 | "description": "The **primary key** for this table",
93 | "meta": {},
94 | "data_type": null,
95 | "constraints": [],
96 | "quote": null,
97 | "tags": [],
98 | "granularity": null,
99 | "type": "unknown",
100 | "column_fqn": "dev.core.my_first_dbt_model.id",
101 | "table_fqn": "dev.core.my_first_dbt_model",
102 | "table_relative_fqn": "core.my_first_dbt_model",
103 | "transformations": [
104 | "NULL AS id",
105 | "2 AS id",
106 | "1 AS id",
107 | "1 AS id",
108 | "source_data.id AS id"
109 | ],
110 | "depends_on": []
111 | }
112 | },
113 | "metadata": {}
114 | },
115 | "model.dbtcore.my_executesql_dbt_model": {
116 | "stats": {},
117 | "columns": {},
118 | "metadata": {}
119 | },
120 | "model.dbtcore.my_failing_dbt_model": {
121 | "stats": {},
122 | "columns": {
123 | "my_failing_column": {
124 | "name": "my_failing_column",
125 | "type": "unknown",
126 | "column_fqn": "dev.core.my_failing_dbt_model.my_failing_column",
127 | "table_fqn": "dev.core.my_failing_dbt_model",
128 | "table_relative_fqn": "core.my_failing_dbt_model",
129 | "transformations": [],
130 | "depends_on": []
131 | }
132 | },
133 | "metadata": {}
134 | },
135 | "model.dbtcore.my_core_table1": {
136 | "stats": {},
137 | "columns": {
138 | "id": {
139 | "name": "id",
140 | "description": "",
141 | "meta": {},
142 | "data_type": null,
143 | "constraints": [],
144 | "quote": null,
145 | "tags": [],
146 | "granularity": null,
147 | "type": "unknown",
148 | "column_fqn": "dev.core.my_core_table1.id",
149 | "table_fqn": "dev.core.my_core_table1",
150 | "table_relative_fqn": "core.my_core_table1",
151 | "transformations": [
152 | "2 AS id",
153 | "1 AS id",
154 | "source_data.id AS id"
155 | ],
156 | "depends_on": []
157 | },
158 | "row_data": {
159 | "name": "row_data",
160 | "description": "",
161 | "meta": {},
162 | "data_type": null,
163 | "constraints": [],
164 | "quote": null,
165 | "tags": [],
166 | "granularity": null,
167 | "type": "unknown",
168 | "column_fqn": "dev.core.my_core_table1.row_data",
169 | "table_fqn": "dev.core.my_core_table1",
170 | "table_relative_fqn": "core.my_core_table1",
171 | "transformations": [
172 | "'row1' AS row_data",
173 | "'row1' AS row_data",
174 | "source_data.row_data AS row_data"
175 | ],
176 | "depends_on": []
177 | }
178 | },
179 | "metadata": {}
180 | },
181 | "model.dbtcore.my_second_dbt_model": {
182 | "stats": {},
183 | "columns": {
184 | "pk_id": {
185 | "name": "pk_id",
186 | "description": "The primary key for this table",
187 | "meta": {},
188 | "data_type": null,
189 | "constraints": [],
190 | "quote": null,
191 | "tags": [],
192 | "granularity": null,
193 | "type": "unknown",
194 | "column_fqn": "dev.core.my_second_dbt_model.pk_id",
195 | "table_fqn": "dev.core.my_second_dbt_model",
196 | "table_relative_fqn": "core.my_second_dbt_model",
197 | "transformations": [
198 | "dev.core.my_first_dbt_model AS t1",
199 | "t1.id AS pk_id"
200 | ],
201 | "depends_on": [
202 | {
203 | "name": "id",
204 | "type": "unknown",
205 | "column_fqn": "dev.core.my_first_dbt_model.id",
206 | "table_fqn": "dev.core.my_first_dbt_model",
207 | "table_relative_fqn": "core.my_first_dbt_model",
208 | "transformations": [],
209 | "depends_on": [],
210 | "model_id": "model.dbtcore.my_first_dbt_model"
211 | }
212 | ]
213 | },
214 | "data_value1": {
215 | "name": "data_value1",
216 | "description": "",
217 | "meta": {},
218 | "data_type": null,
219 | "constraints": [],
220 | "quote": null,
221 | "tags": [],
222 | "granularity": null,
223 | "type": "unknown",
224 | "column_fqn": "dev.core.my_second_dbt_model.data_value1",
225 | "table_fqn": "dev.core.my_second_dbt_model",
226 | "table_relative_fqn": "core.my_second_dbt_model",
227 | "transformations": [
228 | "dev.core.my_first_dbt_model AS t1",
229 | "t1.data_value AS data_value1"
230 | ],
231 | "depends_on": [
232 | {
233 | "name": "data_value",
234 | "type": "unknown",
235 | "column_fqn": "dev.core.my_first_dbt_model.data_value",
236 | "table_fqn": "dev.core.my_first_dbt_model",
237 | "table_relative_fqn": "core.my_first_dbt_model",
238 | "transformations": [],
239 | "depends_on": [],
240 | "model_id": "model.dbtcore.my_first_dbt_model"
241 | }
242 | ]
243 | },
244 | "data_value2": {
245 | "name": "data_value2",
246 | "description": "",
247 | "meta": {},
248 | "data_type": null,
249 | "constraints": [],
250 | "quote": null,
251 | "tags": [],
252 | "granularity": null,
253 | "type": "unknown",
254 | "column_fqn": "dev.core.my_second_dbt_model.data_value2",
255 | "table_fqn": "dev.core.my_second_dbt_model",
256 | "table_relative_fqn": "core.my_second_dbt_model",
257 | "transformations": [
258 | "dev.core.my_core_table1 AS t2",
259 | "dev.core.my_first_dbt_model AS t1",
260 | "dev.core.my_first_dbt_model AS t1",
261 | "CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2"
262 | ],
263 | "depends_on": [
264 | {
265 | "name": "data_value",
266 | "type": "unknown",
267 | "column_fqn": "dev.core.my_first_dbt_model.data_value",
268 | "table_fqn": "dev.core.my_first_dbt_model",
269 | "table_relative_fqn": "core.my_first_dbt_model",
270 | "transformations": [],
271 | "depends_on": [],
272 | "model_id": "model.dbtcore.my_first_dbt_model"
273 | },
274 | {
275 | "name": "column_3",
276 | "type": "unknown",
277 | "column_fqn": "dev.core.my_first_dbt_model.column_3",
278 | "table_fqn": "dev.core.my_first_dbt_model",
279 | "table_relative_fqn": "core.my_first_dbt_model",
280 | "transformations": [],
281 | "depends_on": [],
282 | "model_id": "model.dbtcore.my_first_dbt_model"
283 | },
284 | {
285 | "name": "row_data",
286 | "type": "unknown",
287 | "column_fqn": "dev.core.my_core_table1.row_data",
288 | "table_fqn": "dev.core.my_core_table1",
289 | "table_relative_fqn": "core.my_core_table1",
290 | "transformations": [],
291 | "depends_on": [],
292 | "model_id": "model.dbtcore.my_core_table1"
293 | }
294 | ]
295 | },
296 | "event_tstamp": {
297 | "name": "event_tstamp",
298 | "description": "",
299 | "meta": {},
300 | "data_type": null,
301 | "constraints": [],
302 | "quote": null,
303 | "tags": [],
304 | "granularity": null,
305 | "type": "unknown",
306 | "column_fqn": "dev.core.my_second_dbt_model.event_tstamp",
307 | "table_fqn": "dev.core.my_second_dbt_model",
308 | "table_relative_fqn": "core.my_second_dbt_model",
309 | "transformations": [
310 | "dev.core.my_executedlt_model AS t3",
311 | "t3.event_tstamp AS event_tstamp"
312 | ],
313 | "depends_on": [
314 | {
315 | "name": "event_tstamp",
316 | "type": "unknown",
317 | "column_fqn": "dev.core.my_executedlt_model.event_tstamp",
318 | "table_fqn": "dev.core.my_executedlt_model",
319 | "table_relative_fqn": "core.my_executedlt_model",
320 | "transformations": [],
321 | "depends_on": [],
322 | "model_id": "model.dbtcore.my_executedlt_model"
323 | }
324 | ]
325 | }
326 | },
327 | "metadata": {}
328 | },
329 | "model.dbtcore.my_executepython_dlt_model": {
330 | "stats": {},
331 | "columns": {},
332 | "metadata": {}
333 | },
334 | "model.dbtcore.my_executedlt_model": {
335 | "stats": {},
336 | "columns": {
337 | "event_id": {
338 | "name": "event_id",
339 | "description": "",
340 | "meta": {},
341 | "data_type": null,
342 | "constraints": [],
343 | "quote": null,
344 | "tags": [],
345 | "granularity": null,
346 | "type": "unknown",
347 | "column_fqn": "dev.core.my_executedlt_model.event_id",
348 | "table_fqn": "dev.core.my_executedlt_model",
349 | "table_relative_fqn": "core.my_executedlt_model",
350 | "transformations": [],
351 | "depends_on": []
352 | },
353 | "event_tstamp": {
354 | "name": "event_tstamp",
355 | "description": "",
356 | "meta": {},
357 | "data_type": null,
358 | "constraints": [],
359 | "quote": null,
360 | "tags": [],
361 | "granularity": null,
362 | "type": "unknown",
363 | "column_fqn": "dev.core.my_executedlt_model.event_tstamp",
364 | "table_fqn": "dev.core.my_executedlt_model",
365 | "table_relative_fqn": "core.my_executedlt_model",
366 | "transformations": [],
367 | "depends_on": []
368 | }
369 | },
370 | "metadata": {}
371 | },
372 | "model.dbtcore.my_executepython_model": {
373 | "stats": {},
374 | "columns": {
375 | "event_id": {
376 | "name": "event_id",
377 | "description": "",
378 | "meta": {},
379 | "data_type": null,
380 | "constraints": [],
381 | "quote": null,
382 | "tags": [],
383 | "granularity": null,
384 | "type": "unknown",
385 | "column_fqn": "dev.core.my_executepython_model.event_id",
386 | "table_fqn": "dev.core.my_executepython_model",
387 | "table_relative_fqn": "core.my_executepython_model",
388 | "transformations": [],
389 | "depends_on": []
390 | },
391 | "event_tstamp": {
392 | "name": "event_tstamp",
393 | "description": "",
394 | "meta": {},
395 | "data_type": null,
396 | "constraints": [],
397 | "quote": null,
398 | "tags": [],
399 | "granularity": null,
400 | "type": "unknown",
401 | "column_fqn": "dev.core.my_executepython_model.event_tstamp",
402 | "table_fqn": "dev.core.my_executepython_model",
403 | "table_relative_fqn": "core.my_executepython_model",
404 | "transformations": [],
405 | "depends_on": []
406 | }
407 | },
408 | "metadata": {}
409 | },
410 | "source.dbtfinance.core.my_executepython_model": {
411 | "stats": {},
412 | "columns": {},
413 | "metadata": {}
414 | },
415 | "source.dbtfinance.core.my_executepython_dlt_model": {
416 | "stats": {},
417 | "columns": {},
418 | "metadata": {}
419 | }
420 | },
421 | "sources": {},
422 | "errors": null
423 | }
--------------------------------------------------------------------------------
/docs/opendbtdocs/run_info.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/run-results/v6.json",
4 | "dbt_version": "1.9.4",
5 | "generated_at": "2025-05-15T18:01:12.604966Z",
6 | "invocation_id": "03a957b9-3b54-4612-b157-2c69961dbcf9",
7 | "env": {}
8 | },
9 | "elapsed_time": 11.362421989440918,
10 | "args": {
11 | "warn_error_options": {
12 | "include": [],
13 | "exclude": []
14 | },
15 | "show": false,
16 | "which": "build",
17 | "state_modified_compare_vars": false,
18 | "export_saved_queries": false,
19 | "include_saved_query": false,
20 | "use_colors": true,
21 | "version_check": true,
22 | "log_format_file": "debug",
23 | "require_explicit_package_overrides_for_builtin_materializations": true,
24 | "log_level": "info",
25 | "source_freshness_run_project_hooks": false,
26 | "populate_cache": true,
27 | "defer": false,
28 | "select": [],
29 | "require_batched_execution_for_custom_microbatch_strategy": false,
30 | "print": true,
31 | "state_modified_compare_more_unrendered_values": false,
32 | "strict_mode": false,
33 | "require_yaml_configuration_for_mf_time_spines": false,
34 | "send_anonymous_usage_stats": true,
35 | "log_file_max_bytes": 10485760,
36 | "exclude": [],
37 | "resource_types": [],
38 | "log_format": "default",
39 | "partial_parse_file_diff": true,
40 | "write_json": true,
41 | "invocation_command": "dbt test_dbt_docs.py::TestDbtDocs::test_run_docs_generate",
42 | "quiet": false,
43 | "target": "dev",
44 | "vars": {},
45 | "favor_state": false,
46 | "log_path": "/Users/simseki/IdeaProjects/opendbt/tests/resources/dbtcore/logs",
47 | "macro_debugging": false,
48 | "exclude_resource_types": [],
49 | "require_nested_cumulative_type_params": false,
50 | "require_resource_names_without_spaces": false,
51 | "static_parser": true,
52 | "show_resource_report": false,
53 | "printer_width": 80,
54 | "introspect": true,
55 | "cache_selected_only": false,
56 | "log_level_file": "debug",
57 | "skip_nodes_if_on_run_start_fails": false,
58 | "profiles_dir": "/Users/simseki/IdeaProjects/opendbt/tests/resources/dbtcore",
59 | "project_dir": "/Users/simseki/IdeaProjects/opendbt/tests/resources/dbtcore",
60 | "use_colors_file": true,
61 | "empty": false,
62 | "partial_parse": true,
63 | "indirect_selection": "eager"
64 | },
65 | "nodes": {
66 | "model.dbtcore.my_core_table1": {
67 | "run_status": "success",
68 | "run_completed_at": "2025-05-15 18:01:01",
69 | "run_message": "OK",
70 | "run_failures": null,
71 | "run_adapter_response": {
72 | "_message": "OK"
73 | }
74 | },
75 | "model.dbtcore.my_executedlt_model": {
76 | "run_status": "success",
77 | "run_completed_at": "2025-05-15 18:01:09",
78 | "run_message": "Executed DLT pipeline",
79 | "run_failures": null,
80 | "run_adapter_response": {
81 | "_message": "Executed DLT pipeline",
82 | "code": "import dlt\nfrom dlt.pipeline import TPipeline\n\n\n@dlt.resource(\n columns={\"event_tstamp\": {\"data_type\": \"timestamp\", \"precision\": 3}},\n primary_key=\"event_id\",\n)\ndef events():\n yield [{\"event_id\": 1, \"event_tstamp\": \"2024-07-30T10:00:00.123\"},\n {\"event_id\": 2, \"event_tstamp\": \"2025-02-30T10:00:00.321\"}]\n\n\ndef model(dbt, pipeline: TPipeline):\n \"\"\"\n\n :param dbt:\n :param pipeline: Pre-configured dlt pipeline. dlt target connection and dataset is pre-set using the model config!\n :return:\n \"\"\"\n dbt.config(materialized=\"executedlt\")\n print(\"========================================================\")\n print(f\"INFO: DLT Pipeline pipeline_name:{pipeline.pipeline_name}\")\n print(f\"INFO: DLT Pipeline dataset_name:{pipeline.dataset_name}\")\n print(f\"INFO: DLT Pipeline dataset_name:{pipeline}\")\n print(f\"INFO: DLT Pipeline staging:{pipeline.staging}\")\n print(f\"INFO: DLT Pipeline destination:{pipeline.destination}\")\n print(f\"INFO: DLT Pipeline _pipeline_storage:{pipeline._pipeline_storage}\")\n print(f\"INFO: DLT Pipeline _schema_storage:{pipeline._schema_storage}\")\n print(f\"INFO: DLT Pipeline state:{pipeline.state}\")\n print(f\"INFO: DBT this:{dbt.this}\")\n print(\"========================================================\")\n load_info = pipeline.run(events(), table_name=str(str(dbt.this).split('.')[-1]).strip('\"'))\n print(load_info)\n row_counts = pipeline.last_trace.last_normalize_info\n print(row_counts)\n print(\"========================================================\")\n return None\n\n\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\ndef ref(*args, **kwargs):\n refs = {}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n\ndef source(*args, dbt_load_df_function):\n sources = {}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n\nconfig_dict = {}\n\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"dev\"\n schema = \"core\"\n identifier = \"my_executedlt_model\"\n \n def __repr__(self):\n return '\"dev\".\"core\".\"my_executedlt_model\"'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = False\n\n# COMMAND ----------\n\n\n",
83 | "rows_affected": -1
84 | }
85 | },
86 | "model.dbtcore.my_executepython_dlt_model": {
87 | "run_status": "success",
88 | "run_completed_at": "2025-05-15 18:01:12",
89 | "run_message": "Executed Python",
90 | "run_failures": null,
91 | "run_adapter_response": {
92 | "_message": "Executed Python",
93 | "code": "import dlt\n\n\n@dlt.resource(\n columns={\"event_tstamp\": {\"data_type\": \"timestamp\", \"precision\": 3}},\n primary_key=\"event_id\",\n)\ndef events():\n yield [{\"event_id\": 1, \"event_tstamp\": \"2024-07-30T10:00:00.123\"},\n {\"event_id\": 2, \"event_tstamp\": \"2025-02-30T10:00:00.321\"}]\n\n\ndef model(dbt, session):\n dbt.config(materialized=\"executepython\")\n print(\"========================================================\")\n print(f\"INFO: DLT Version:{dlt.version.__version__}\")\n print(f\"INFO: DBT Duckdb Session:{type(session)}\")\n print(f\"INFO: DBT Duckdb Connection:{type(session._env.conn)}\")\n print(\"========================================================\")\n p = dlt.pipeline(\n pipeline_name=\"dbt_dlt\",\n destination=dlt.destinations.duckdb(session._env.conn),\n dataset_name=dbt.this.schema,\n dev_mode=False,\n )\n load_info = p.run(events())\n print(load_info)\n row_counts = p.last_trace.last_normalize_info\n print(row_counts)\n print(\"========================================================\")\n return None\n\n\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\ndef ref(*args, **kwargs):\n refs = {}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n\ndef source(*args, dbt_load_df_function):\n sources = {}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n\nconfig_dict = {}\n\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"dev\"\n schema = \"core\"\n identifier = \"my_executepython_dlt_model\"\n \n def __repr__(self):\n return '\"dev\".\"core\".\"my_executepython_dlt_model\"'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = False\n\n# COMMAND ----------\n\n\n",
94 | "rows_affected": -1
95 | }
96 | },
97 | "model.dbtcore.my_executepython_model": {
98 | "run_status": "success",
99 | "run_completed_at": "2025-05-15 18:01:12",
100 | "run_message": "Executed Python",
101 | "run_failures": null,
102 | "run_adapter_response": {
103 | "_message": "Executed Python",
104 | "code": "import os\nimport platform\n\nfrom dbt import version\n\n\ndef print_info():\n _str = f\"name:{os.name}, system:{platform.system()} release:{platform.release()}\"\n _str += f\"\\npython version:{platform.python_version()}, dbt:{version.__version__}\"\n print(_str)\n\n\ndef model(dbt, session):\n dbt.config(materialized=\"executepython\")\n print(\"==================================================\")\n print(\"========IM LOCALLY EXECUTED PYTHON MODEL==========\")\n print(\"==================================================\")\n print_info()\n print(\"==================================================\")\n print(\"===============MAKE DBT GREAT AGAIN===============\")\n print(\"==================================================\")\n return None\n\n\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\ndef ref(*args, **kwargs):\n refs = {}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n\ndef source(*args, dbt_load_df_function):\n sources = {}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n\nconfig_dict = {}\n\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"dev\"\n schema = \"core\"\n identifier = \"my_executepython_model\"\n \n def __repr__(self):\n return '\"dev\".\"core\".\"my_executepython_model\"'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = False\n\n# COMMAND ----------\n\n\n",
105 | "rows_affected": -1
106 | }
107 | },
108 | "model.dbtcore.my_executesql_dbt_model": {
109 | "run_status": "success",
110 | "run_completed_at": "2025-05-15 18:01:12",
111 | "run_message": "OK",
112 | "run_failures": null,
113 | "run_adapter_response": {
114 | "_message": "OK"
115 | }
116 | },
117 | "model.dbtcore.my_first_dbt_model": {
118 | "run_status": "success",
119 | "run_completed_at": "2025-05-15 18:01:12",
120 | "run_message": "OK",
121 | "run_failures": null,
122 | "run_adapter_response": {
123 | "_message": "OK"
124 | }
125 | },
126 | "test.dbtcore.not_null_my_first_dbt_model_id.5fb22c2710": {
127 | "run_status": "warn",
128 | "run_completed_at": "2025-05-15 18:01:12",
129 | "run_message": "Got 1 result, configured to warn if >0",
130 | "run_failures": 1,
131 | "run_adapter_response": {
132 | "_message": "OK"
133 | }
134 | },
135 | "test.dbtcore.unique_my_first_dbt_model_id.16e066b321": {
136 | "run_status": "warn",
137 | "run_completed_at": "2025-05-15 18:01:12",
138 | "run_message": "Got 1 result, configured to warn if >0",
139 | "run_failures": 1,
140 | "run_adapter_response": {
141 | "_message": "OK"
142 | }
143 | },
144 | "model.dbtcore.my_failing_dbt_model": {
145 | "run_status": "error",
146 | "run_completed_at": "2025-05-15 18:01:12",
147 | "run_message": "Runtime Error in model my_failing_dbt_model (models/my_failing_dbt_model.sql)\n Binder Error: Referenced column \"non_exists_column\" not found in FROM clause!\n Candidate bindings: \"id\"\n \n LINE 11: select non_exists_column as my_failing_column\n ^",
148 | "run_failures": null,
149 | "run_adapter_response": {}
150 | },
151 | "model.dbtcore.my_second_dbt_model": {
152 | "run_status": "success",
153 | "run_completed_at": "2025-05-15 18:01:12",
154 | "run_message": "OK",
155 | "run_failures": null,
156 | "run_adapter_response": {
157 | "_message": "OK"
158 | }
159 | },
160 | "test.dbtcore.not_null_my_second_dbt_model_pk_id.b08c51696a": {
161 | "run_status": "pass",
162 | "run_completed_at": "2025-05-15 18:01:12",
163 | "run_message": null,
164 | "run_failures": 0,
165 | "run_adapter_response": {
166 | "_message": "OK"
167 | }
168 | },
169 | "test.dbtcore.unique_my_second_dbt_model_pk_id.b8b65b2a4f": {
170 | "run_status": "fail",
171 | "run_completed_at": "2025-05-15 18:01:12",
172 | "run_message": "Got 1 result, configured to fail if != 0",
173 | "run_failures": 1,
174 | "run_adapter_response": {
175 | "_message": "OK"
176 | }
177 | }
178 | }
179 | }
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: OpenDbt
2 | site_url: http://memiiso.github.io/opendbt
3 | repo_url: https://github.com/memiiso/opendbt
4 | theme:
5 | name: material
6 | features:
7 | # - navigation.instant
8 | - navigation.indexes
9 | - navigation.tabs
10 | # - navigation.expand
11 | - toc.integrate
12 | - content.code.copy
13 | - content.tabs.link
14 | nav:
15 | - Home: index.md
16 | - Examples: examples.md
17 | - Catalog: catalog.md
18 | - Catalog (Demo): opendbtdocs/index.html
19 |
20 | markdown_extensions:
21 | - pymdownx.highlight:
22 | anchor_linenums: true
23 | line_spans: __span
24 | pygments_lang_class: true
25 | - pymdownx.inlinehilite
26 | - pymdownx.snippets
27 | - pymdownx.superfences
28 | - abbr
29 | - pymdownx.snippets:
30 | base_path: [ !relative $config_dir ]
31 | check_paths: true
32 | - attr_list
33 | - pymdownx.emoji:
34 | emoji_index: !!python/name:material.extensions.emoji.twemoji
35 | emoji_generator: !!python/name:material.extensions.emoji.to_svg
--------------------------------------------------------------------------------
/opendbt/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from typing import List, Callable, Optional
4 |
5 | # IMPORTANT! this will import the overrides, and activates the patches
6 | from opendbt.dbt import *
7 | from opendbt.logger import OpenDbtLogger
8 | from opendbt.utils import Utils
9 |
10 |
11 | class OpenDbtCli:
12 | def __init__(self, project_dir: Path, profiles_dir: Optional[Path] = None, callbacks: Optional[List[Callable]] = None):
13 | self.project_dir: Path = Path(get_nearest_project_dir(project_dir.as_posix()))
14 | self.profiles_dir: Optional[Path] = profiles_dir if profiles_dir else default_profiles_dir()
15 | self._project: Optional[PartialProject] = None
16 | self._user_callbacks: List[Callable] = callbacks if callbacks else []
17 | self._project_callbacks: List[Callable] = []
18 |
19 | @property
20 | def project(self) -> PartialProject:
21 | if not self._project:
22 | self._project = PartialProject.from_project_root(project_root=self.project_dir.as_posix(),
23 | verify_version=True)
24 |
25 | return self._project
26 |
27 | @property
28 | def project_dict(self) -> dict:
29 | return self.project.project_dict
30 |
31 | @property
32 | def project_vars(self) -> dict:
33 | """
34 | :return: dict: Variables defined in the `dbt_project.yml` file, `vars`.
35 | Note:
36 | This method only retrieves global project variables specified within the `dbt_project.yml` file.
37 | Variables passed via command-line arguments are not included in the returned dictionary.
38 | """
39 | return self.project_dict.get('vars', {})
40 |
41 |
42 | @property
43 | def project_callbacks(self) -> List[Callable]:
44 | if not self._project_callbacks:
45 | self._project_callbacks = list(self._user_callbacks)
46 | dbt_callbacks_str = self.project_vars.get('dbt_callbacks', "")
47 | dbt_callbacks_list = [c for c in dbt_callbacks_str.split(',') if c.strip()]
48 | for callback_module_name in dbt_callbacks_list:
49 | callback_func = Utils.import_module_attribute_by_name(callback_module_name.strip())
50 | self._project_callbacks.append(callback_func)
51 |
52 | return self._project_callbacks
53 |
54 | def invoke(self, args: List[str], callbacks: Optional[List[Callable]] = None) -> dbtRunnerResult:
55 | """
56 | Run dbt with the given arguments.
57 |
58 | :param args: The arguments to pass to dbt.
59 | :param callbacks:
60 | :return: The result of the dbt run.
61 | """
62 | run_callbacks = self.project_callbacks + (callbacks if callbacks else self.project_callbacks)
63 | run_args = args or []
64 | if "--project-dir" not in run_args:
65 | run_args += ["--project-dir", self.project_dir.as_posix()]
66 | if "--profiles-dir" not in run_args and self.profiles_dir:
67 | run_args += ["--profiles-dir", self.profiles_dir.as_posix()]
68 | return self.run(args=run_args, callbacks=run_callbacks)
69 |
70 | @staticmethod
71 | def run(args: List[str], callbacks: Optional[List[Callable]] = None) -> dbtRunnerResult:
72 | """
73 | Run dbt with the given arguments.
74 |
75 | :param callbacks:
76 | :param args: The arguments to pass to dbt.
77 | :return: The result of the dbt run.
78 | """
79 | callbacks = callbacks if callbacks else []
80 | # https://docs.getdbt.com/reference/programmatic-invocations
81 | runner = DbtCliRunner(callbacks=callbacks)
82 | result: dbtRunnerResult = runner.invoke(args)
83 |
84 | if result.success:
85 | return result
86 |
87 | if result.exception:
88 | raise result.exception
89 |
90 | # take error message and raise it as exception
91 | err_messages = [res.message for res in result.result if isinstance(res, RunResult) and res.status == 'error']
92 |
93 | if err_messages:
94 | raise DbtRuntimeError(msg="\n".join(err_messages))
95 |
96 | raise DbtRuntimeError(msg=f"DBT execution failed!")
97 |
98 | def manifest(self, partial_parse: bool = True, no_write_manifest: bool = True) -> Manifest:
99 | args = ["parse"]
100 | if partial_parse:
101 | args.append("--partial-parse")
102 | if no_write_manifest:
103 | args.append("--no-write-json")
104 |
105 | result = self.invoke(args=args)
106 | if not result.success:
107 | raise Exception(f"DBT execution failed. result:{result}")
108 | if isinstance(result.result, Manifest):
109 | return result.result
110 |
111 | raise Exception(f"DBT execution did not return Manifest object. returned:{type(result.result)}")
112 |
113 | def generate_docs(self, args: Optional[List[str]] = None):
114 | _args = ["docs", "generate"] + (args if args else [])
115 | self.invoke(args=_args)
116 |
117 |
118 | class OpenDbtProject(OpenDbtLogger):
119 | """
120 | This class is used to take action on a dbt project.
121 | """
122 |
123 | DEFAULT_TARGET = 'dev' # development
124 |
125 | def __init__(self, project_dir: Path, target: Optional[str] = None, profiles_dir: Optional[Path] = None, args: Optional[List[str]] = None, callbacks: Optional[List[Callable]] = None):
126 | super().__init__()
127 | self.project_dir: Path = project_dir
128 | self.profiles_dir: Optional[Path] = profiles_dir
129 | self.target: str = target if target else self.DEFAULT_TARGET
130 | self.args: List[str] = args if args else []
131 | self.cli: OpenDbtCli = OpenDbtCli(project_dir=self.project_dir, profiles_dir=self.profiles_dir, callbacks=callbacks)
132 |
133 | @property
134 | def project(self) -> PartialProject:
135 | return self.cli.project
136 |
137 | @property
138 | def project_dict(self) -> dict:
139 | return self.cli.project_dict
140 |
141 | @property
142 | def project_vars(self) -> dict:
143 | return self.cli.project_vars
144 |
145 | def run(self, command: str = "build", target: Optional[str] = None, args: Optional[List[str]] = None, use_subprocess: bool = False,
146 | write_json: bool = False) -> Optional[dbtRunnerResult]:
147 | run_args = args if args else []
148 | run_args.extend(["--target", target if target else self.target])
149 | run_args.extend(["--project-dir", self.project_dir.as_posix()])
150 | if self.profiles_dir:
151 | run_args.extend(["--profiles-dir", self.profiles_dir.as_posix()])
152 | run_args = [command] + run_args + self.args
153 | if write_json:
154 | run_args.remove("--no-write-json")
155 |
156 | if use_subprocess:
157 | shell = False
158 | self.log.info(f"Working dir: {os.getcwd()}")
159 | py_executable = sys.executable if sys.executable else 'python'
160 | self.log.info(f"Python executable: {py_executable}")
161 | __command = [py_executable, '-m', 'opendbt'] + run_args
162 | self.log.info(f"Running command (shell={shell}) `{' '.join(__command)}`")
163 | Utils.runcommand(command=__command)
164 | return None
165 |
166 | self.log.info(f"Running `dbt {' '.join(run_args)}`")
167 | return self.cli.invoke(args=run_args)
168 |
169 | def manifest(self, partial_parse: bool = True, no_write_manifest: bool = True) -> Manifest:
170 | return self.cli.manifest(partial_parse=partial_parse, no_write_manifest=no_write_manifest)
171 |
172 | def generate_docs(self, args: Optional[List[str]] = None):
173 | return self.cli.generate_docs(args=args)
174 |
--------------------------------------------------------------------------------
/opendbt/__main__.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 |
4 | from opendbt import OpenDbtCli, default_project_dir, default_profiles_dir
5 |
6 |
7 | def main():
8 | parser = argparse.ArgumentParser(description="OpenDBT CLI")
9 | parser.add_argument(
10 | "--project-dir",
11 | default=None,
12 | help="Path to the dbt project directory. Defaults to the DBT_PROJECT_DIR environment variable or the current working directory.",
13 | )
14 | parser.add_argument(
15 | "--profiles-dir",
16 | default=None,
17 | help="Path to the dbt profiles directory. Defaults to the DBT_PROFILES_DIR environment variable.",
18 | )
19 | ns, args = parser.parse_known_args()
20 | project_dir = Path(ns.project_dir) if ns.project_dir else default_project_dir()
21 | profiles_dir = Path(ns.profiles_dir) if ns.profiles_dir else default_profiles_dir()
22 |
23 | OpenDbtCli(project_dir=project_dir, profiles_dir=profiles_dir).invoke(args=args)
24 |
25 |
26 | if __name__ == "__main__":
27 | main()
28 |
--------------------------------------------------------------------------------
/opendbt/airflow/__init__.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta
2 | from pathlib import Path
3 | from typing import Tuple
4 |
5 | from airflow import DAG
6 | from airflow.models.baseoperator import BaseOperator
7 | from airflow.operators.empty import EmptyOperator
8 |
9 | import opendbt
10 |
11 |
12 | class OpenDbtExecutorOperator(BaseOperator):
13 | """
14 | An Airflow operator for executing dbt commands.
15 | """
16 |
17 | def __init__(self,
18 | project_dir: Path,
19 | command: str,
20 | target: str = None,
21 | profiles_dir: Path = None,
22 | select: str = None,
23 | args: list = None,
24 | # without using subprocess airflow randomly gets deadlock
25 | use_subprocess: bool = True,
26 | execution_timeout=timedelta(minutes=60), **kwargs) -> None:
27 | super().__init__(execution_timeout=execution_timeout, **kwargs)
28 |
29 | self.project_dir: Path = project_dir
30 | self.command = command
31 | self.profiles_dir: Path = profiles_dir
32 | self.target = target
33 | self.use_subprocess = use_subprocess
34 | self.args = args if args else []
35 |
36 | if select:
37 | self.args += ["--select", select]
38 |
39 | # use separate colour for test and other executions
40 | if self.command == "test":
41 | self.ui_color = "#1CB1C2"
42 | else:
43 | self.ui_color = "#0084ff"
44 |
45 | def execute(self, context):
46 | """
47 | Execute the dbt command.
48 | """
49 | runner = opendbt.OpenDbtProject(project_dir=self.project_dir,
50 | profiles_dir=self.profiles_dir,
51 | target=self.target)
52 | runner.run(command=self.command, args=self.args, use_subprocess=self.use_subprocess)
53 |
54 |
55 | # pylint: disable=too-many-locals, too-many-branches
56 | class OpenDbtAirflowProject(opendbt.OpenDbtProject):
57 |
58 | def load_dbt_tasks(self,
59 | dag: DAG,
60 | start_node: BaseOperator = None,
61 | end_node: BaseOperator = None,
62 | tag: str = None,
63 | resource_type="all",
64 | include_dbt_seeds=False,
65 | include_singular_tests=False) -> Tuple[BaseOperator, BaseOperator]:
66 | """
67 | This method is used to add dbt tasks to Given DAG.
68 |
69 | Parameters:
70 | dag (DAG): The Airflow DAG object where the dbt tasks will be added.
71 | start_node (BaseOperator, optional): The starting node of the DAG. If not provided, an EmptyOperator will be used.
72 | end_node (BaseOperator, optional): The ending node of the DAG. If not provided, an EmptyOperator will be used.
73 | tag (str, optional): The tag to filter the dbt tasks. If provided, only tasks with this tag will be added to the DAG.
74 | resource_type (str, optional): The type of dbt resource to run. It can be "all", "model", or "test". Default is "all".
75 | run_dbt_seeds (bool, optional): A flag to indicate whether to run dbt seeds before all other dbt jobs. Default is False.
76 |
77 | Returns:
78 | Tuple[BaseOperator, BaseOperator]: The start and end nodes of the DAG after adding the dbt tasks.
79 | """
80 |
81 | start_node = start_node if start_node else EmptyOperator(task_id='dbt-%s-start' % self.project_dir.name,
82 | dag=dag)
83 | end_node = end_node if end_node else EmptyOperator(task_id='dbt-%s-end' % self.project_dir.name, dag=dag)
84 |
85 | if include_dbt_seeds:
86 | # add dbt seeds job after start node abd before all other dbt jobs
87 | first_node = start_node
88 | start_node = OpenDbtExecutorOperator(dag=dag,
89 | task_id="dbt-seeds",
90 | project_dir=self.project_dir,
91 | profiles_dir=self.profiles_dir,
92 | target=self.target,
93 | command="seed"
94 | )
95 | start_node.set_upstream(first_node)
96 |
97 | manifest = self.manifest()
98 | dbt_tasks = {}
99 | # create all the jobs. granular as one job per model/table
100 | for key, node in manifest.nodes.items():
101 | if tag and tag not in node.tags:
102 | self.log.debug(
103 | f"Skipping node:{node.name} because it dont have desired desired-tag={tag} node-tags={node.tags}")
104 | # LOG DEBUG OR TRACE here print(f" tag:{tag} NOT in {node.tags} SKIPP {node.name}")
105 | continue # skip if the node don't have the desired tag
106 |
107 | if resource_type == "test" and not str(node.name).startswith("source_"):
108 | if node.resource_type == "test":
109 | dbt_tasks[node.unique_id] = OpenDbtExecutorOperator(dag=dag,
110 | task_id=node.unique_id,
111 | project_dir=self.project_dir,
112 | profiles_dir=self.profiles_dir,
113 | target=self.target,
114 | command="test",
115 | select=node.name
116 | )
117 | if node.resource_type == "model":
118 | dbt_tasks[node.unique_id] = EmptyOperator(dag=dag, task_id=node.unique_id)
119 |
120 | if node.resource_type == "model" and resource_type in ["all", "model"]:
121 | # NOTE `build` command also runs the tests that's why are skipping tests for models below
122 | dbt_tasks[node.unique_id] = OpenDbtExecutorOperator(dag=dag,
123 | task_id=node.unique_id,
124 | project_dir=self.project_dir,
125 | profiles_dir=self.profiles_dir,
126 | target=self.target,
127 | command="build",
128 | select=node.name
129 | )
130 |
131 | if node.resource_type == "test" and str(node.name).startswith("source_") and resource_type in ["all",
132 | "test"]:
133 | # we are skipping model tests because they are included above with model execution( `build` command)
134 | # source table tests
135 | dbt_tasks[node.unique_id] = OpenDbtExecutorOperator(dag=dag,
136 | task_id=node.unique_id,
137 | project_dir=self.project_dir,
138 | profiles_dir=self.profiles_dir,
139 | target=self.target,
140 | command="test",
141 | select=node.name
142 | )
143 |
144 | # set upstream dependencies using dbt dependencies
145 | for key, node in manifest.nodes.items():
146 | if tag and tag not in node.tags:
147 | continue # skip if the node don't have the desired tag
148 | if node.unique_id in dbt_tasks: # node.resource_type == "model" or True or
149 | task = dbt_tasks[node.unique_id]
150 | if node.depends_on_nodes:
151 | for upstream_id in node.depends_on_nodes:
152 | if upstream_id in dbt_tasks:
153 | self.log.debug(f"Setting upstream of {task.task_id} -> {upstream_id}")
154 | task.set_upstream(dbt_tasks[upstream_id])
155 |
156 | singular_tests = None
157 | if include_singular_tests:
158 | singular_tests = OpenDbtExecutorOperator(dag=dag,
159 | task_id=f"{self.project_dir.name}_singular_tests",
160 | project_dir=self.project_dir,
161 | profiles_dir=self.profiles_dir,
162 | target=self.target,
163 | command="test",
164 | select="test_type:singular"
165 | )
166 | for k, task in dbt_tasks.items():
167 | if not task.downstream_task_ids:
168 | # set downstream dependencies for the end nodes.
169 | self.log.debug(f"Setting downstream of {task.task_id} -> {end_node.task_id}")
170 |
171 | if include_singular_tests and singular_tests:
172 | task.set_downstream(singular_tests)
173 | else:
174 | task.set_downstream(end_node)
175 |
176 | if not task.upstream_task_ids:
177 | # set upstream dependencies for the nodes which don't have upstream dependency
178 | self.log.debug(f"Setting upstream of {task.task_id} -> {start_node}")
179 | task.set_upstream(start_node)
180 |
181 | if include_singular_tests:
182 | singular_tests.set_downstream(end_node)
183 | return start_node, end_node
184 |
--------------------------------------------------------------------------------
/opendbt/airflow/plugin.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 |
4 | # pylint: disable=inconsistent-return-statements
5 | def init_plugins_dbtdocs_page(dbt_docs_dir: Path):
6 | from airflow.plugins_manager import AirflowPlugin
7 | from flask import Blueprint
8 | from flask_appbuilder import BaseView, expose
9 | from flask import abort
10 | from airflow.www.auth import has_access
11 | from airflow.security import permissions
12 |
13 | class DBTDocsView(BaseView):
14 | route_base = "/dbt"
15 | default_view = "dbt_docs_index"
16 |
17 | @expose("/dbt_docs_index.html") # type: ignore[misc]
18 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
19 | def dbt_docs_index(self):
20 | if not dbt_docs_dir.joinpath("index.html").is_file():
21 | abort(404)
22 | else:
23 | return dbt_docs_dir.joinpath("index.html").read_text()
24 | # return self.render_template("index.html", content="")
25 |
26 | def return_json(self, json_file: str):
27 | if not dbt_docs_dir.joinpath(json_file).is_file():
28 | abort(404)
29 | else:
30 | data = dbt_docs_dir.joinpath(json_file).read_text()
31 | return data, 200, {"Content-Type": "application/json"}
32 |
33 | @expose("/catalog.json") # type: ignore[misc]
34 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
35 | def catalog(self):
36 | return self.return_json("catalog.json")
37 |
38 | @expose("/manifest.json") # type: ignore[misc]
39 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
40 | def manifest(self):
41 | return self.return_json("manifest.json")
42 |
43 | @expose("/run_info.json") # type: ignore[misc]
44 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
45 | def run_info(self):
46 | return self.return_json("run_info.json")
47 |
48 | @expose("/catalogl.json") # type: ignore[misc]
49 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
50 | def catalogl(self):
51 | return self.return_json("catalogl.json")
52 |
53 |
54 | # Creating a flask blueprint to integrate the templates and static folder
55 | bp = Blueprint(
56 | "DBT Plugin",
57 | __name__,
58 | template_folder=dbt_docs_dir.as_posix(),
59 | static_folder=dbt_docs_dir.as_posix(),
60 | # static_url_path='/dbtdocsview'
61 | )
62 |
63 | class AirflowDbtDocsPlugin(AirflowPlugin):
64 | name = "DBT Docs Plugin"
65 | flask_blueprints = [bp]
66 | appbuilder_views = [{"name": "DBT Docs", "category": "", "view": DBTDocsView()}]
67 |
68 | return AirflowDbtDocsPlugin
69 |
--------------------------------------------------------------------------------
/opendbt/dbt/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from dbt import version
4 | from packaging.version import Version
5 |
6 | from opendbt.runtime_patcher import RuntimePatcher
7 |
8 | OPENDBT_INDEX_HTML_FILE = Path(__file__).parent.joinpath('docs/index.html')
9 |
10 | try:
11 | # IMPORTANT! `opendbt.dbt` import needs to happen before any `dbt` import
12 | dbt_version = Version(version.get_installed_version().to_version_string(skip_matcher=True))
13 | if Version("1.7.0") <= dbt_version < Version("1.8.0"):
14 | RuntimePatcher(module_name="dbt.include.global_project").patch_attribute(attribute_name="DOCS_INDEX_FILE_PATH",
15 | new_value=OPENDBT_INDEX_HTML_FILE)
16 | from opendbt.dbt.v17.adapters.factory import OpenDbtAdapterContainer
17 | from opendbt.dbt.v17.task.docs.generate import OpenDbtGenerateTask
18 | from opendbt.dbt.v17.config.runtime import OpenDbtRuntimeConfig
19 | from opendbt.dbt.v17.task.run import OpenDbtModelRunner
20 | elif Version("1.8.0") <= dbt_version < Version("1.10.0"):
21 | RuntimePatcher(module_name="dbt.task.docs").patch_attribute(attribute_name="DOCS_INDEX_FILE_PATH",
22 | new_value=OPENDBT_INDEX_HTML_FILE)
23 | from opendbt.dbt.v18.adapters.factory import OpenDbtAdapterContainer
24 | from opendbt.dbt.v18.task.docs.generate import OpenDbtGenerateTask
25 | from opendbt.dbt.v18.config.runtime import OpenDbtRuntimeConfig
26 | from opendbt.dbt.v18.task.run import OpenDbtModelRunner
27 | from opendbt.dbt.v18.artifacts.schemas.run import OpenDbtRunResultsArtifact
28 | else:
29 | raise Exception(
30 | f"Unsupported dbt version {dbt_version}, please make sure dbt version is supported/integrated by opendbt")
31 |
32 | RuntimePatcher(module_name="dbt.adapters.factory").patch_attribute(attribute_name="FACTORY",
33 | new_value=OpenDbtAdapterContainer())
34 | # shared code patches
35 | from opendbt.dbt.shared.cli.main import sqlfluff
36 | from opendbt.dbt.shared.cli.main import sqlfluff_lint
37 | from opendbt.dbt.shared.cli.main import sqlfluff_fix
38 | from opendbt.dbt.shared.adapters.impl import OpenDbtBaseAdapter
39 |
40 | # dbt imports
41 | from dbt.cli.main import dbtRunner as DbtCliRunner
42 | from dbt.cli.main import dbtRunnerResult
43 | from dbt.cli.resolvers import default_profiles_dir, default_project_dir
44 | from dbt.config import PartialProject
45 | from dbt.contracts.graph.manifest import Manifest
46 | from dbt.contracts.results import RunResult
47 | from dbt.exceptions import DbtRuntimeError
48 | from dbt.task.base import get_nearest_project_dir
49 | except:
50 | raise
--------------------------------------------------------------------------------
/opendbt/dbt/docs/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | catalog/
--------------------------------------------------------------------------------
/opendbt/dbt/shared/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/shared/adapters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/adapters/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/shared/adapters/impl.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import sys
3 | import tempfile
4 | from typing import Dict
5 |
6 | from dbt.adapters.base import available, BaseAdapter
7 |
8 | from opendbt.runtime_patcher import PatchClass
9 |
10 |
11 | @PatchClass(module_name="dbt.adapters.base", target_name="BaseAdapter")
12 | class OpenDbtBaseAdapter(BaseAdapter):
13 |
14 | def _execute_python_model(self, model_name: str, compiled_code: str, **kwargs):
15 | try:
16 | with tempfile.NamedTemporaryFile(suffix=f'.py', delete=True) as model_file:
17 | try:
18 | model_file.write(compiled_code.lstrip().encode('utf-8'))
19 | model_file.flush()
20 | print(f"Created temp py file {model_file.name}")
21 | # Load the module spec
22 | spec = importlib.util.spec_from_file_location(model_name, model_file.name)
23 | # Create a module object
24 | module = importlib.util.module_from_spec(spec)
25 | # Load the module
26 | sys.modules[model_name] = module
27 | spec.loader.exec_module(module)
28 | dbt_obj = module.dbtObj(None)
29 | # Access and call `model` function of the model!
30 | # IMPORTANT: here we are passing down duckdb session from the adapter to the model
31 | module.model(dbt=dbt_obj, **kwargs)
32 | except Exception as e:
33 | raise Exception(
34 | f"Failed to load or execute python model:{model_name} from file {model_file.as_posix()}") from e
35 | finally:
36 | model_file.close()
37 | except Exception as e:
38 | raise Exception(f"Failed to create temp py file for model:{model_name}") from e
39 |
40 | @available
41 | def submit_local_python_job(self, parsed_model: Dict, compiled_code: str):
42 | connection = self.connections.get_if_exists()
43 | if not connection:
44 | connection = self.connections.get_thread_connection()
45 | self._execute_python_model(model_name=parsed_model['name'], compiled_code=compiled_code,
46 | session=connection.handle)
47 |
48 | @available
49 | def submit_local_dlt_job(self, parsed_model: Dict, compiled_code: str):
50 | connection = self.connections.get_if_exists()
51 | if not connection:
52 | connection = self.connections.get_thread_connection()
53 |
54 | import dlt
55 | # IMPORTANT: here we are pre-configuring and preparing dlt.pipeline for the model!
56 | _pipeline = dlt.pipeline(
57 | pipeline_name=str(parsed_model['unique_id']).replace(".", "-"),
58 | destination=dlt.destinations.duckdb(connection.handle._env.conn),
59 | dataset_name=parsed_model['schema'],
60 | dev_mode=False,
61 | )
62 | self._execute_python_model(model_name=parsed_model['name'], compiled_code=compiled_code, pipeline=_pipeline)
63 |
--------------------------------------------------------------------------------
/opendbt/dbt/shared/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/cli/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/shared/cli/main.py:
--------------------------------------------------------------------------------
1 | import click
2 | from dbt.cli import requires, params as p
3 | from dbt.cli.main import global_flags, cli
4 |
5 | from opendbt.dbt.shared.task.sqlfluff import SqlFluffTasks
6 | from opendbt.runtime_patcher import PatchFunction
7 |
8 |
9 | # dbt docs
10 | @cli.group()
11 | @click.pass_context
12 | @global_flags
13 | @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff")
14 | def sqlfluff(ctx, **kwargs):
15 | """Generate or serve the documentation website for your project"""
16 |
17 |
18 | # dbt docs generate
19 | @sqlfluff.command("lint")
20 | @click.pass_context
21 | @global_flags
22 | @p.defer
23 | @p.deprecated_defer
24 | @p.exclude
25 | @p.favor_state
26 | @p.deprecated_favor_state
27 | @p.full_refresh
28 | @p.indirect_selection
29 | @p.profile
30 | @p.profiles_dir
31 | @p.project_dir
32 | @p.resource_type
33 | @p.select
34 | @p.selector
35 | @p.show
36 | @p.state
37 | @p.defer_state
38 | @p.deprecated_state
39 | @p.store_failures
40 | @p.target
41 | @p.target_path
42 | @p.threads
43 | @p.vars
44 | @requires.postflight
45 | @requires.preflight
46 | @requires.profile
47 | @requires.project
48 | @requires.runtime_config
49 | @requires.manifest(write=False)
50 | @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff_lint")
51 | def sqlfluff_lint(ctx, **kwargs):
52 | """Generate the documentation website for your project"""
53 | task = SqlFluffTasks(
54 | ctx.obj["flags"],
55 | ctx.obj["runtime_config"],
56 | ctx.obj["manifest"],
57 | )
58 |
59 | results = task.lint()
60 | success = task.interpret_results(results)
61 | return results, success
62 |
63 |
64 | # dbt docs generate
65 | @sqlfluff.command("fix")
66 | @click.pass_context
67 | @global_flags
68 | @p.defer
69 | @p.deprecated_defer
70 | @p.exclude
71 | @p.favor_state
72 | @p.deprecated_favor_state
73 | @p.full_refresh
74 | @p.indirect_selection
75 | @p.profile
76 | @p.profiles_dir
77 | @p.project_dir
78 | @p.resource_type
79 | @p.select
80 | @p.selector
81 | @p.show
82 | @p.state
83 | @p.defer_state
84 | @p.deprecated_state
85 | @p.store_failures
86 | @p.target
87 | @p.target_path
88 | @p.threads
89 | @p.vars
90 | @requires.postflight
91 | @requires.preflight
92 | @requires.profile
93 | @requires.project
94 | @requires.runtime_config
95 | @requires.manifest(write=False)
96 | @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff_lint")
97 | def sqlfluff_fix(ctx, **kwargs):
98 | """Generate the documentation website for your project"""
99 | task = SqlFluffTasks(
100 | ctx.obj["flags"],
101 | ctx.obj["runtime_config"],
102 | ctx.obj["manifest"],
103 | )
104 |
105 | results = task.fix()
106 | success = task.interpret_results(results)
107 | return results, success
108 |
--------------------------------------------------------------------------------
/opendbt/dbt/shared/task/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/task/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/shared/task/sqlfluff.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from pathlib import Path
4 | from typing import Optional
5 |
6 | from dbt.config import RuntimeConfig
7 | from dbt.contracts.results import (
8 | CatalogResults,
9 | CatalogArtifact, RunExecutionResult,
10 | )
11 | from dbt.task.compile import CompileTask
12 | from sqlfluff.cli import commands
13 | from sqlfluff.core import Linter, FluffConfig
14 | from sqlfluff.core.linter import LintingResult
15 | from sqlfluff_templater_dbt import DbtTemplater
16 |
17 |
18 | class SqlFluffTasks(CompileTask):
19 |
20 | def __init__(self, args, config, manifest):
21 | super().__init__(args, config, manifest)
22 |
23 | self.sqlfluff_config = FluffConfig.from_path(path=self.config.project_root)
24 |
25 | templater_obj = self.sqlfluff_config._configs["core"]["templater_obj"]
26 | if isinstance(templater_obj, DbtTemplater):
27 | templater_obj: DbtTemplater
28 | self.config: RuntimeConfig
29 | templater_obj.project_root = self.config.project_root
30 | templater_obj.working_dir = self.config.project_root
31 | self.linter = Linter(self.sqlfluff_config)
32 |
33 | def get_result(self, elapsed_time: float, violations: list, num_violations: int):
34 | run_result = RunExecutionResult(
35 | results=[],
36 | elapsed_time=elapsed_time,
37 | generated_at=datetime.now(),
38 | # args=dbt.utils.args_to_dict(self.args),
39 | args={},
40 | )
41 | result = CatalogArtifact.from_results(
42 | nodes={},
43 | sources={},
44 | generated_at=datetime.now(),
45 | errors=violations if violations else None,
46 | compile_results=run_result,
47 | )
48 | if num_violations > 0:
49 | setattr(result, 'exception', Exception(f"Linting {num_violations} errors found!"))
50 | result.exception = Exception(f"Linting {num_violations} errors found!")
51 |
52 | return result
53 |
54 | def lint(self) -> CatalogArtifact:
55 | os.chdir(self.config.project_root)
56 | lint_result: LintingResult = self.linter.lint_paths(paths=(self.config.project_root,))
57 | result = self.get_result(lint_result.total_time, lint_result.get_violations(), lint_result.num_violations())
58 | if lint_result.num_violations() > 0:
59 | print(f"Linting {lint_result.num_violations()} errors found!")
60 | for error in lint_result.as_records():
61 | filepath = Path(error['filepath'])
62 | violations: list = error['violations']
63 | if violations:
64 | print(f"File: {filepath.relative_to(self.config.project_root)}")
65 | for violation in violations:
66 | print(f" {violation}")
67 | # print(f"Code:{violation['code']} Line:{violation['start_line_no']}, LinePos:{violation['start_line_pos']} {violation['description']}")
68 | return result
69 |
70 | def fix(self) -> CatalogArtifact:
71 | os.chdir(self.config.project_root)
72 | lnt, formatter = commands.get_linter_and_formatter(cfg=self.sqlfluff_config)
73 | lint_result: LintingResult = lnt.lint_paths(
74 | paths=(self.config.project_root,),
75 | fix=True,
76 | apply_fixes=True
77 | )
78 | result = self.get_result(lint_result.total_time, [], 0)
79 | return result
80 |
81 | @classmethod
82 | def interpret_results(self, results: Optional[CatalogResults]) -> bool:
83 | if results is None:
84 | return False
85 | if hasattr(results, "errors") and results.errors:
86 | return False
87 | return True
88 |
--------------------------------------------------------------------------------
/opendbt/dbt/v17/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v17/adapters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/adapters/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v17/adapters/factory.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | from importlib import import_module
3 |
4 | from dbt.adapters import factory
5 | from dbt.adapters.factory import Adapter
6 | from dbt.events.functions import fire_event
7 | from dbt.events.types import AdapterRegistered
8 | from dbt.semver import VersionSpecifier
9 |
10 | from opendbt.runtime_patcher import PatchClass
11 |
12 |
13 | @PatchClass(module_name="dbt.adapters.factory", target_name="AdapterContainer")
14 | class OpenDbtAdapterContainer(factory.AdapterContainer):
15 | DBT_CUSTOM_ADAPTER_VAR = 'dbt_custom_adapter'
16 |
17 | def register_adapter(self, config: 'AdapterRequiredConfig') -> None:
18 | # ==== CUSTOM CODE ====
19 | # ==== END CUSTOM CODE ====
20 | adapter_name = config.credentials.type
21 | adapter_type = self.get_adapter_class_by_name(adapter_name)
22 | adapter_version = import_module(f".{adapter_name}.__version__", "dbt.adapters").version
23 | # ==== CUSTOM CODE ====
24 | custom_adapter_class_name: str = self.get_custom_adapter_config_value(config)
25 | if custom_adapter_class_name and custom_adapter_class_name.strip():
26 | # OVERRIDE DEFAULT ADAPTER BY USER GIVEN ADAPTER CLASS
27 | adapter_type = self.get_custom_adapter_class_by_name(custom_adapter_class_name)
28 | # ==== END CUSTOM CODE ====
29 | adapter_version_specifier = VersionSpecifier.from_version_string(
30 | adapter_version
31 | ).to_version_string()
32 | fire_event(
33 | AdapterRegistered(adapter_name=adapter_name, adapter_version=adapter_version_specifier)
34 | )
35 | with self.lock:
36 | if adapter_name in self.adapters:
37 | # this shouldn't really happen...
38 | return
39 |
40 | adapter: Adapter = adapter_type(config) # type: ignore
41 | self.adapters[adapter_name] = adapter
42 |
43 | def get_custom_adapter_config_value(self, config: 'AdapterRequiredConfig') -> str:
44 | # FIRST: it's set as cli value: dbt run --vars {'dbt_custom_adapter': 'custom_adapters.DuckDBAdapterV1Custom'}
45 | if hasattr(config, 'cli_vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.cli_vars:
46 | custom_adapter_class_name: str = config.cli_vars[self.DBT_CUSTOM_ADAPTER_VAR]
47 | if custom_adapter_class_name and custom_adapter_class_name.strip():
48 | return custom_adapter_class_name
49 | # SECOND: it's set inside dbt_project.yml
50 | if hasattr(config, 'vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.vars.to_dict():
51 | custom_adapter_class_name: str = config.vars.to_dict()[self.DBT_CUSTOM_ADAPTER_VAR]
52 | if custom_adapter_class_name and custom_adapter_class_name.strip():
53 | return custom_adapter_class_name
54 |
55 | return None
56 |
57 | def get_custom_adapter_class_by_name(self, custom_adapter_class_name: str):
58 | if "." not in custom_adapter_class_name:
59 | raise ValueError(f"Unexpected adapter class name: `{custom_adapter_class_name}` ,"
60 | f"Expecting something like:`my.sample.library.MyAdapterClass`")
61 |
62 | __module, __class = custom_adapter_class_name.rsplit('.', 1)
63 | try:
64 | user_adapter_module = importlib.import_module(__module)
65 | user_adapter_class = getattr(user_adapter_module, __class)
66 | return user_adapter_class
67 | except ModuleNotFoundError as mnfe:
68 | raise Exception(f"Module of provided adapter not found, provided: {custom_adapter_class_name}") from mnfe
69 |
--------------------------------------------------------------------------------
/opendbt/dbt/v17/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/config/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v17/config/runtime.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from pathlib import Path
3 | from typing import Mapping
4 |
5 | from dbt.config import RuntimeConfig
6 | from dbt.config.project import path_exists, _load_yaml
7 | from dbt.constants import DEPENDENCIES_FILE_NAME
8 | from dbt.exceptions import DbtProjectError, NonUniquePackageNameError
9 | from typing_extensions import override
10 |
11 | from opendbt.runtime_patcher import PatchClass
12 |
13 | def load_yml_dict(file_path):
14 | ret = {}
15 | if path_exists(file_path):
16 | ret = _load_yaml(file_path) or {}
17 | return ret
18 |
19 | # pylint: disable=too-many-ancestors
20 | @dataclass
21 | @PatchClass(module_name="dbt.config", target_name="RuntimeConfig")
22 | @PatchClass(module_name="dbt.cli.requires", target_name="RuntimeConfig")
23 | class OpenDbtRuntimeConfig(RuntimeConfig):
24 | def load_dependence_projects(self):
25 | dependencies_yml_dict = load_yml_dict(f"{self.project_root}/{DEPENDENCIES_FILE_NAME}")
26 |
27 | if "projects" not in dependencies_yml_dict:
28 | return
29 |
30 | projects = dependencies_yml_dict["projects"]
31 | project_root_parent = Path(self.project_root).parent
32 | for project in projects:
33 | path = project_root_parent.joinpath(project['name'])
34 | try:
35 | project = self.new_project(str(path.as_posix()))
36 | except DbtProjectError as e:
37 | raise DbtProjectError(
38 | f"Failed to read depending project: {e} \n project path:{path.as_posix()}",
39 | result_type="invalid_project",
40 | path=path,
41 | ) from e
42 |
43 | yield project.project_name, project
44 |
45 | @override
46 | def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]:
47 | # if self.dependencies is None:
48 |
49 | if self.dependencies is None:
50 | # this sets self.dependencies variable!
51 | self.dependencies = super().load_dependencies(base_only=base_only)
52 |
53 | # additionally load `projects` defined in `dependencies.yml`
54 | for project_name, project in self.load_dependence_projects():
55 | if project_name in self.dependencies:
56 | raise NonUniquePackageNameError(project_name)
57 | self.dependencies[project_name] = project
58 |
59 | return self.dependencies
60 |
--------------------------------------------------------------------------------
/opendbt/dbt/v17/task/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/task/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v17/task/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/task/docs/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v17/task/docs/generate.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 |
4 | import click
5 | from dbt.task.generate import GenerateTask, CATALOG_FILENAME, MANIFEST_FILE_NAME
6 |
7 | from opendbt.catalog import OpenDbtCatalog
8 | from opendbt.runtime_patcher import PatchClass
9 |
10 |
11 | @PatchClass(module_name="dbt.task.generate", target_name="GenerateTask")
12 | class OpenDbtGenerateTask(GenerateTask):
13 |
14 | def deploy_user_index_html(self):
15 | # run custom code
16 | target = Path(self.config.project_target_path).joinpath("index.html")
17 | for dir in self.config.docs_paths:
18 | index_html = Path(self.config.project_root).joinpath(dir).joinpath("index.html")
19 | if index_html.is_file() and index_html.exists():
20 | # override default dbt provided index.html with user index.html file
21 | shutil.copyfile(index_html, target)
22 | click.echo(f"Using user provided documentation page: {index_html.as_posix()}")
23 | return
24 |
25 | def generate_opendbt_catalogl_json(self):
26 | catalog_path = Path(self.config.project_target_path).joinpath(CATALOG_FILENAME)
27 | manifest_path = Path(self.config.project_target_path).joinpath(MANIFEST_FILE_NAME)
28 | catalog = OpenDbtCatalog(manifest_path=manifest_path, catalog_path=catalog_path)
29 | catalog.export()
30 |
31 | def run(self):
32 | # Call the original dbt run method
33 | result = super().run()
34 | self.deploy_user_index_html()
35 | self.generate_opendbt_catalogl_json()
36 | return result
37 |
--------------------------------------------------------------------------------
/opendbt/dbt/v17/task/run.py:
--------------------------------------------------------------------------------
1 | from dbt.contracts.results import NodeStatus
2 | from dbt.events.base_types import EventLevel
3 | from dbt.events.functions import fire_event
4 | from dbt.events.types import (
5 | LogModelResult,
6 | )
7 | from dbt.task import run
8 |
9 | from opendbt.runtime_patcher import PatchClass
10 |
11 |
12 | @PatchClass(module_name="dbt.task.run", target_name="ModelRunner")
13 | class OpenDbtModelRunner(run.ModelRunner):
14 |
15 | def print_result_adapter_response(self, result):
16 | if hasattr(result, 'adapter_response') and result.adapter_response:
17 | if result.status == NodeStatus.Error:
18 | status = result.status
19 | level = EventLevel.ERROR
20 | else:
21 | status = result.message
22 | level = EventLevel.INFO
23 | fire_event(
24 | LogModelResult(
25 | description=str(result.adapter_response),
26 | status=status,
27 | index=self.node_index,
28 | total=self.num_nodes,
29 | execution_time=result.execution_time,
30 | node_info=self.node.node_info,
31 | ),
32 | level=level,
33 | )
34 |
35 | def print_result_line(self, result):
36 | super().print_result_line(result)
37 | self.print_result_adapter_response(result=result)
38 |
--------------------------------------------------------------------------------
/opendbt/dbt/v18/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/adapters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/adapters/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/adapters/factory.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | from multiprocessing.context import SpawnContext
3 | from typing import Optional
4 |
5 | from dbt.adapters import factory
6 | from dbt.adapters.contracts.connection import AdapterRequiredConfig
7 | from dbt.adapters.events.types import (
8 | AdapterRegistered,
9 | )
10 | from dbt.adapters.factory import Adapter
11 | from dbt_common.events.base_types import EventLevel
12 | from dbt_common.events.functions import fire_event
13 |
14 | from opendbt.runtime_patcher import PatchClass
15 |
16 |
17 | @PatchClass(module_name="dbt.adapters.factory", target_name="AdapterContainer")
18 | class OpenDbtAdapterContainer(factory.AdapterContainer):
19 | DBT_CUSTOM_ADAPTER_VAR = 'dbt_custom_adapter'
20 |
21 | def register_adapter(
22 | self,
23 | config: 'AdapterRequiredConfig',
24 | mp_context: SpawnContext,
25 | adapter_registered_log_level: Optional[EventLevel] = EventLevel.INFO,
26 | ) -> None:
27 | adapter_name = config.credentials.type
28 | adapter_type = self.get_adapter_class_by_name(adapter_name)
29 | adapter_version = self._adapter_version(adapter_name)
30 | # ==== CUSTOM CODE ====
31 | custom_adapter_class_name: str = self.get_custom_adapter_config_value(config)
32 | if custom_adapter_class_name and custom_adapter_class_name.strip():
33 | # OVERRIDE DEFAULT ADAPTER BY USER GIVEN ADAPTER CLASS
34 | adapter_type = self.get_custom_adapter_class_by_name(custom_adapter_class_name)
35 | # ==== END CUSTOM CODE ====
36 | fire_event(
37 | AdapterRegistered(adapter_name=adapter_name, adapter_version=adapter_version),
38 | level=adapter_registered_log_level,
39 | )
40 | with self.lock:
41 | if adapter_name in self.adapters:
42 | # this shouldn't really happen...
43 | return
44 |
45 | adapter: Adapter = adapter_type(config, mp_context) # type: ignore
46 | self.adapters[adapter_name] = adapter
47 |
48 | def get_custom_adapter_config_value(self, config: 'AdapterRequiredConfig') -> str:
49 | # FIRST: it's set as cli value: dbt run --vars {'dbt_custom_adapter': 'custom_adapters.DuckDBAdapterV1Custom'}
50 | if hasattr(config, 'cli_vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.cli_vars:
51 | custom_adapter_class_name: str = config.cli_vars[self.DBT_CUSTOM_ADAPTER_VAR]
52 | if custom_adapter_class_name and custom_adapter_class_name.strip():
53 | return custom_adapter_class_name
54 | # SECOND: it's set inside dbt_project.yml
55 | if hasattr(config, 'vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.vars.to_dict():
56 | custom_adapter_class_name: str = config.vars.to_dict()[self.DBT_CUSTOM_ADAPTER_VAR]
57 | if custom_adapter_class_name and custom_adapter_class_name.strip():
58 | return custom_adapter_class_name
59 |
60 | return None
61 |
62 | def get_custom_adapter_class_by_name(self, custom_adapter_class_name: str):
63 | if "." not in custom_adapter_class_name:
64 | raise ValueError(f"Unexpected adapter class name: `{custom_adapter_class_name}` ,"
65 | f"Expecting something like:`my.sample.library.MyAdapterClass`")
66 |
67 | __module, __class = custom_adapter_class_name.rsplit('.', 1)
68 | try:
69 | user_adapter_module = importlib.import_module(__module)
70 | user_adapter_class = getattr(user_adapter_module, __class)
71 | return user_adapter_class
72 | except ModuleNotFoundError as mnfe:
73 | raise Exception(f"Module of provided adapter not found, provided: {custom_adapter_class_name}") from mnfe
74 |
--------------------------------------------------------------------------------
/opendbt/dbt/v18/artifacts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/artifacts/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/artifacts/schemas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/artifacts/schemas/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/artifacts/schemas/run.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 |
4 | from dbt.artifacts.schemas import run
5 | from filelock import FileLock, Timeout
6 |
7 | from opendbt.runtime_patcher import PatchClass
8 | from opendbt.utils import Utils
9 |
10 |
11 | # pylint: disable=too-many-ancestors
12 | @PatchClass(module_name="dbt.artifacts.schemas.run", target_name="RunResultsArtifact")
13 | @PatchClass(module_name="dbt.artifacts.schemas.run.v5.run", target_name="RunResultsArtifact")
14 | class OpenDbtRunResultsArtifact(run.RunResultsArtifact):
15 |
16 | def run_info(self) -> dict:
17 | run_info_data: dict = self.to_dict(omit_none=False)
18 | nodes = {}
19 | for r in self.results:
20 | key = r.unique_id
21 | execute_entry = next((item for item in r.timing if item.name == "execute"), None)
22 | run_completed_at = execute_entry.completed_at if execute_entry else None
23 | nodes[key] = {}
24 | nodes[key]['run_status'] = r.status
25 | nodes[key]['run_completed_at'] = run_completed_at.strftime("%Y-%m-%d %H:%M:%S")
26 | nodes[key]['run_message'] = r.message
27 | nodes[key]['run_failures'] = r.failures
28 | nodes[key]['run_adapter_response'] = r.adapter_response
29 |
30 | # pylint: disable=unsupported-assignment-operation
31 | run_info_data['nodes'] = nodes
32 | run_info_data.pop('results', None)
33 | return run_info_data
34 |
35 | def write_run_info(self, path: str):
36 | run_info_file = Path(path).parent.joinpath("run_info.json")
37 | command = self.args.get('which', "NONE")
38 | if command not in ['run', 'build', 'test']:
39 | return
40 |
41 | lock_file = run_info_file.with_suffix(".json.lock") # Use a distinct lock file extension
42 | data = {}
43 | try:
44 | # 2. Acquire lock (wait up to 10 seconds)
45 | lock = FileLock(lock_file, timeout=10)
46 | with lock:
47 | if run_info_file.exists() and run_info_file.stat().st_size > 0:
48 | try:
49 | with open(run_info_file, 'r', encoding='utf-8') as f:
50 | data = json.load(f)
51 | # Ensure it's a dictionary before merging
52 | if not isinstance(data, dict):
53 | print(f"Warning: Content of {run_info_file} is not a JSON object. Overwriting.")
54 | data = {}
55 | except json.JSONDecodeError:
56 | print(f"Warning: Could not decode JSON from {run_info_file}. Overwriting.")
57 | except Exception as e:
58 | print(f"Error reading {run_info_file}: {e}. Starting fresh.")
59 |
60 | new_data = self.run_info()
61 | data = Utils.merge_dicts(data, new_data)
62 |
63 | try:
64 | with open(run_info_file, 'w', encoding='utf-8') as f:
65 | json.dump(data, f)
66 | except Exception as e:
67 | print(f"Error writing merged data to {run_info_file}: {e}")
68 |
69 | except Timeout:
70 | print(
71 | f"Error: Could not acquire lock on {lock_file} within 10 seconds. Skipping update for {run_info_file}.")
72 | except Exception as e:
73 | # Catch other potential errors during locking or file operations
74 | print(f"An unexpected error occurred processing {run_info_file}: {e}")
75 |
76 | def write(self, path: str):
77 | super().write(path)
78 | self.write_run_info(path=path)
79 |
--------------------------------------------------------------------------------
/opendbt/dbt/v18/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/config/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/config/runtime.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from pathlib import Path
3 | from typing import Mapping
4 |
5 | from dbt.config import RuntimeConfig
6 | from dbt.config.project import load_yml_dict
7 | from dbt.constants import DEPENDENCIES_FILE_NAME
8 | from dbt.exceptions import DbtProjectError, NonUniquePackageNameError
9 | from typing_extensions import override
10 |
11 | from opendbt.runtime_patcher import PatchClass
12 |
13 | # pylint: disable=too-many-ancestors
14 | @dataclass
15 | @PatchClass(module_name="dbt.config", target_name="RuntimeConfig")
16 | @PatchClass(module_name="dbt.cli.requires", target_name="RuntimeConfig")
17 | class OpenDbtRuntimeConfig(RuntimeConfig):
18 | def load_dependence_projects(self):
19 | dependencies_yml_dict = load_yml_dict(f"{self.project_root}/{DEPENDENCIES_FILE_NAME}")
20 |
21 | if "projects" not in dependencies_yml_dict:
22 | return
23 |
24 | projects = dependencies_yml_dict["projects"]
25 | project_root_parent = Path(self.project_root).parent
26 | for project in projects:
27 | path = project_root_parent.joinpath(project['name'])
28 | try:
29 | project = self.new_project(str(path.as_posix()))
30 | except DbtProjectError as e:
31 | raise DbtProjectError(
32 | f"Failed to read depending project: {e} \n project path:{path.as_posix()}",
33 | result_type="invalid_project",
34 | path=path,
35 | ) from e
36 |
37 | yield project.project_name, project
38 |
39 | @override
40 | def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]:
41 | # if self.dependencies is None:
42 |
43 | if self.dependencies is None:
44 | # this sets self.dependencies variable!
45 | self.dependencies = super().load_dependencies(base_only=base_only)
46 |
47 | # additionally load `projects` defined in `dependencies.yml`
48 | for project_name, project in self.load_dependence_projects():
49 | if project_name in self.dependencies:
50 | raise NonUniquePackageNameError(project_name)
51 | self.dependencies[project_name] = project
52 |
53 | return self.dependencies
54 |
--------------------------------------------------------------------------------
/opendbt/dbt/v18/task/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/task/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/task/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/task/docs/__init__.py
--------------------------------------------------------------------------------
/opendbt/dbt/v18/task/docs/generate.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 |
4 | import click
5 | from dbt.task.docs.generate import GenerateTask, CATALOG_FILENAME, MANIFEST_FILE_NAME
6 |
7 | from opendbt.catalog import OpenDbtCatalog
8 | from opendbt.runtime_patcher import PatchClass
9 |
10 |
11 | @PatchClass(module_name="dbt.task.docs.generate", target_name="GenerateTask")
12 | class OpenDbtGenerateTask(GenerateTask):
13 |
14 | def deploy_user_index_html(self):
15 | # run custom code
16 | target = Path(self.config.project_target_path).joinpath("index.html")
17 | for dir in self.config.docs_paths:
18 | index_html = Path(self.config.project_root).joinpath(dir).joinpath("index.html")
19 | if index_html.is_file() and index_html.exists():
20 | # override default dbt provided index.html with user index.html file
21 | shutil.copyfile(index_html, target)
22 | click.echo(f"Using user provided documentation page: {index_html.as_posix()}")
23 | return
24 |
25 | def generate_opendbt_catalogl_json(self):
26 | catalog_path = Path(self.config.project_target_path).joinpath(CATALOG_FILENAME)
27 | manifest_path = Path(self.config.project_target_path).joinpath(MANIFEST_FILE_NAME)
28 | catalog = OpenDbtCatalog(manifest_path=manifest_path, catalog_path=catalog_path)
29 | catalog.export()
30 |
31 | def run(self):
32 | # Call the original dbt run method
33 | result = super().run()
34 | self.deploy_user_index_html()
35 | self.generate_opendbt_catalogl_json()
36 | return result
37 |
--------------------------------------------------------------------------------
/opendbt/dbt/v18/task/run.py:
--------------------------------------------------------------------------------
1 | from dbt.artifacts.schemas.results import NodeStatus
2 | from dbt.events.types import (
3 | LogModelResult,
4 | )
5 | from dbt.task import run
6 | from dbt_common.events.base_types import EventLevel
7 | from dbt_common.events.functions import fire_event
8 |
9 | from opendbt.runtime_patcher import PatchClass
10 |
11 |
12 | @PatchClass(module_name="dbt.task.run", target_name="ModelRunner")
13 | class OpenDbtModelRunner(run.ModelRunner):
14 |
15 | def print_result_adapter_response(self, result):
16 | if hasattr(result, 'adapter_response') and result.adapter_response:
17 | if result.status == NodeStatus.Error:
18 | status = result.status
19 | level = EventLevel.ERROR
20 | else:
21 | status = result.message
22 | level = EventLevel.INFO
23 | fire_event(
24 | LogModelResult(
25 | description=str(result.adapter_response),
26 | status=status,
27 | index=self.node_index,
28 | total=self.num_nodes,
29 | execution_time=result.execution_time,
30 | node_info=self.node.node_info,
31 | ),
32 | level=level,
33 | )
34 |
35 | def print_result_line(self, result):
36 | super().print_result_line(result)
37 | self.print_result_adapter_response(result=result)
38 |
--------------------------------------------------------------------------------
/opendbt/examples.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from multiprocessing.context import SpawnContext
3 |
4 | from dbt.adapters.duckdb import DuckDBAdapter
5 |
6 |
7 | class DuckDBAdapterV2Custom(DuckDBAdapter):
8 | pass
9 |
10 | # NOTE! used for testing
11 | class DuckDBAdapterTestingOnlyDbt17(DuckDBAdapter):
12 | def __init__(self, config) -> None:
13 | print(f"WARNING: Using User Provided DBT Adapter: {type(self).__module__}.{type(self).__name__}")
14 | # pylint: disable=no-value-for-parameter
15 | super().__init__(config=config)
16 | raise Exception("Custom user defined test adapter activated, test exception")
17 |
18 |
19 | # NOTE! used for testing
20 | class DuckDBAdapterTestingOnlyDbt18(DuckDBAdapter):
21 | def __init__(self, config, mp_context: SpawnContext) -> None:
22 | print(f"WARNING: Using User Provided DBT Adapter: {type(self).__module__}.{type(self).__name__}")
23 | super().__init__(config=config, mp_context=mp_context)
24 | raise Exception("Custom user defined test adapter activated, test exception")
25 |
26 |
27 | def email_dbt_test_callback(event: "EventMsg"):
28 | if event.info.name == "LogTestResult" and event.info.level in ["warn", "error"]:
29 | logging.getLogger('dbtcallbacks').warning("DBT callback `email_dbt_test_callback` called!")
30 | email_subject = f"[DBT] test {event.info.level} raised"
31 | email_html_content = f"""Following test raised {event.info.level}!
32 | dbt msg: {event.info.msg}
33 | dbt test: {event.data.name}
34 | dbt node_relation: {event.data.node_info.node_relation}
35 | --------------- full data ---------------
36 | dbt data: {event.data}
37 | """
38 | # @TODO send email alert using airflow
39 | # from airflow.utils.email import send_email
40 | # send_email(
41 | # subject=email_subject,
42 | # to="my-slack-notification-channel@slack.com",
43 | # html_content=email_html_content
44 | # )
45 | logging.getLogger('dbtcallbacks').error("Callback email sent!")
46 |
--------------------------------------------------------------------------------
/opendbt/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 |
4 |
5 | class OpenDbtLogger:
6 | _log = None
7 |
8 | @property
9 | def log(self) -> logging.Logger:
10 | if self._log is None:
11 | self._log = logging.getLogger(name="opendbt")
12 | if not self._log.hasHandlers():
13 | handler = logging.StreamHandler(sys.stdout)
14 | formatter = logging.Formatter("[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s")
15 | handler.setFormatter(formatter)
16 | handler.setLevel(logging.INFO)
17 | self._log.addHandler(handler)
18 | return self._log
--------------------------------------------------------------------------------
/opendbt/macros/executedlt.sql:
--------------------------------------------------------------------------------
1 | {% materialization executedlt, supported_languages=['python']%}
2 |
3 | {%- set identifier = model['alias'] -%}
4 | {%- set language = model['language'] -%}
5 |
6 | {% set grant_config = config.get('grants') %}
7 |
8 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
9 | {%- set target_relation = api.Relation.create(identifier=identifier,
10 | schema=schema,
11 | database=database, type='table') -%}
12 | {{ run_hooks(pre_hooks) }}
13 |
14 | {% call noop_statement(name='main', message='Executed DLT pipeline', code=compiled_code, rows_affected=-1, res=None) %}
15 | {%- set res = adapter.submit_local_dlt_job(model, compiled_code) -%}
16 | {% endcall %}
17 | {{ run_hooks(post_hooks) }}
18 |
19 | {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
20 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
21 |
22 | {% do persist_docs(target_relation, model) %}
23 |
24 | {{ return({'relations': [target_relation]}) }}
25 |
26 | {% endmaterialization %}
27 |
--------------------------------------------------------------------------------
/opendbt/macros/executepython.sql:
--------------------------------------------------------------------------------
1 | {% materialization executepython, supported_languages=['python']%}
2 |
3 | {%- set identifier = model['alias'] -%}
4 | {%- set language = model['language'] -%}
5 |
6 | {% set grant_config = config.get('grants') %}
7 |
8 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
9 | {%- set target_relation = api.Relation.create(identifier=identifier,
10 | schema=schema,
11 | database=database, type='table') -%}
12 | {{ run_hooks(pre_hooks) }}
13 |
14 | {% call noop_statement(name='main', message='Executed Python', code=compiled_code, rows_affected=-1, res=None) %}
15 | {%- set res = adapter.submit_local_python_job(model, compiled_code) -%}
16 | {% endcall %}
17 | {{ run_hooks(post_hooks) }}
18 |
19 | {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
20 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
21 |
22 | {% do persist_docs(target_relation, model) %}
23 |
24 | {{ return({'relations': [target_relation]}) }}
25 |
26 | {% endmaterialization %}
27 |
--------------------------------------------------------------------------------
/opendbt/macros/executesql.sql:
--------------------------------------------------------------------------------
1 | {% materialization executesql, supported_languages=['sql']%}
2 |
3 | {#
4 | modified version of table materialization. it executes compiled sql statement as is.
5 | #}
6 |
7 | {%- set identifier = model['alias'] -%}
8 | {%- set language = model['language'] -%}
9 |
10 | {% set grant_config = config.get('grants') %}
11 |
12 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
13 | {%- set target_relation = api.Relation.create(identifier=identifier,
14 | schema=schema,
15 | database=database, type='table') -%}
16 |
17 | {{ run_hooks(pre_hooks) }}
18 |
19 | {{ log(msg="Executing SQL: " ~ compiled_code ~ "", info=True) }}
20 | {% call statement('main', language=language, fetch_result=False) -%}
21 | {{ compiled_code }}
22 | {%- endcall %}
23 |
24 | {%- set result = load_result('main') -%}
25 | {{ log(msg="Execution result " ~ result ~ "", info=True) }}
26 | {# DISABLED
27 | {%- set result_data = result['data'] -%}
28 | {{ log(msg="Execution result_data " ~ result_data ~ "", info=True) }}
29 | {%- set result_status = result['response'] -%}
30 | {{ log(msg="Execution result_status " ~ result_status ~ "", info=True) }}
31 | END-DISABLED #}
32 |
33 | {{ run_hooks(post_hooks) }}
34 |
35 | {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
36 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
37 |
38 | {% do persist_docs(target_relation, model) %}
39 |
40 | {{ return({'relations': [target_relation]}) }}
41 |
42 | {% endmaterialization %}
43 |
--------------------------------------------------------------------------------
/opendbt/runtime_patcher.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | from typing import Callable, Type, Any
3 | from opendbt.logger import OpenDbtLogger
4 |
5 |
6 | class RuntimePatcher(OpenDbtLogger):
7 | """
8 | A utility class for patching modules and classes at runtime.
9 |
10 | This class provides a simplified way to replace existing functions,
11 | classes, or attributes within modules with custom implementations.
12 | """
13 |
14 | def __init__(self, module_name: str):
15 | """
16 | Initializes the RuntimePatcher for a specific module.
17 |
18 | Args:
19 | module_name: The name of the module to patch (e.g., "dbt.config").
20 | """
21 | self.module_name = module_name
22 | self.module = importlib.import_module(module_name)
23 |
24 | def patch_function(self, function_name: str, new_function: Callable):
25 | """
26 | Patches a function within the module.
27 |
28 | Args:
29 | function_name: The name of the function to patch.
30 | new_function: The new function to use as a replacement.
31 | """
32 | setattr(self.module, function_name, new_function)
33 | self.log.debug(f"Patched function: {self.module_name}.{function_name}")
34 |
35 | def patch_class(self, class_name: str, new_class: Type):
36 | """
37 | Patches a class within the module.
38 |
39 | Args:
40 | class_name: The name of the class to patch.
41 | new_class: The new class to use as a replacement.
42 | """
43 | setattr(self.module, class_name, new_class)
44 | self.log.debug(f"Patched class: {self.module_name}.{class_name}")
45 |
46 | def patch_attribute(self, attribute_name: str, new_value: Any):
47 | """
48 | Patches an attribute within the module.
49 |
50 | Args:
51 | attribute_name: The name of the attribute to patch.
52 | new_value: The new value to assign to the attribute.
53 | """
54 | setattr(self.module, attribute_name, new_value)
55 | self.log.debug(f"Patched attribute: {self.module_name}.{attribute_name}")
56 |
57 | def patch_class_method(self, class_name: str, method_name: str, new_method: Callable):
58 | """
59 | Patches a class method within the module.
60 |
61 | Args:
62 | class_name: The name of the class containing the method.
63 | method_name: The name of the method to patch.
64 | new_method: The new method to use as a replacement.
65 | """
66 | target_class = getattr(self.module, class_name)
67 | setattr(target_class, method_name, new_method)
68 | self.log.debug(f"Patched class method: {self.module_name}.{class_name}.{method_name}")
69 |
70 |
71 | class _PatchDecorator:
72 | """
73 | Base class for patch decorators
74 | """
75 |
76 | def __init__(self, module_name: str, target_name: str):
77 | self.module_name = module_name
78 | self.target_name = target_name
79 | self.patcher = RuntimePatcher(self.module_name)
80 |
81 |
82 | class PatchClass(_PatchDecorator):
83 | """
84 | A decorator for patching classes at runtime.
85 | """
86 |
87 | def __call__(self, target: Type):
88 | self.patcher.patch_class(self.target_name, target)
89 | return target
90 |
91 |
92 | class PatchFunction(_PatchDecorator):
93 | """
94 | A decorator for patching functions at runtime.
95 | """
96 |
97 | def __call__(self, target: Callable):
98 | self.patcher.patch_function(self.target_name, target)
99 | return target
100 |
101 |
102 | class PatchAttribute(_PatchDecorator):
103 | """
104 | A decorator for patching attributes at runtime.
105 | """
106 |
107 | def __call__(self, target: Any):
108 | # if it is callable, call it to get the value
109 | if callable(target):
110 | target = target()
111 | self.patcher.patch_attribute(self.target_name, target)
112 | return target
113 |
114 |
115 | class PatchClassMethod(_PatchDecorator):
116 | """
117 | A decorator for patching class methods at runtime.
118 | """
119 |
120 | def __init__(self, module_name: str, class_name: str, method_name: str):
121 | super().__init__(module_name, class_name)
122 | self.method_name = method_name
123 |
124 | def __call__(self, target: Callable):
125 | self.patcher.patch_class_method(self.target_name, self.method_name, target)
126 | return target
127 |
128 | # Example Usage:
129 |
130 | # Example to use PatchClass for override the ModelRunner class
131 | # @PatchClass(module_name="dbt.task.run", target_name="ModelRunner")
132 | # class CustomModelRunner:
133 | # def __init__(self, *args, **kwargs):
134 | # print("Custom ModelRunner initialized!")
135 | #
136 | #
137 | # # Example to use PatchClass for override the RuntimeConfig class
138 | # @PatchClass(module_name="dbt.config", target_name="RuntimeConfig")
139 | # class CustomRuntimeConfig:
140 | # def __init__(self, *args, **kwargs):
141 | # print("Custom RuntimeConfig initialized!")
142 | #
143 | # # Example to use PatchAttribute for override the FACTORY attribute
144 | # @PatchAttribute(module_name="dbt.adapters.factory", target_name="FACTORY")
145 | # def get_custom_open_dbt_adapter_container():
146 | # class CustomOpenDbtAdapterContainer:
147 | # def __init__(self, *args, **kwargs):
148 | # print("Custom OpenDbtAdapterContainer initialized!")
149 | # return CustomOpenDbtAdapterContainer
150 | #
151 | #
152 | # # Example to use PatchFunction for override the sqlfluff_lint function
153 | # @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff_lint")
154 | # def custom_sqlfluff_lint():
155 | # print("Custom sqlfluff_lint called!")
156 |
157 | # Example to patch class method
158 | # @PatchClassMethod(module_name="dbt.adapters.factory", class_name="AdapterContainer", method_name="get_adapter")
159 | # def custom_get_adapter(self, *args, **kwargs):
160 | # print("Custom get_adapter method called!")
161 | # return "Custom Adapter"
162 |
--------------------------------------------------------------------------------
/opendbt/utils.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import subprocess
3 |
4 |
5 | class Utils:
6 |
7 | @staticmethod
8 | def runcommand(command: list, shell=False):
9 | with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1,
10 | universal_newlines=True, shell=shell) as p:
11 | for line in p.stdout:
12 | if line:
13 | print(line.strip())
14 |
15 | if p.returncode != 0:
16 | raise subprocess.CalledProcessError(p.returncode, p.args)
17 |
18 | @staticmethod
19 | def import_module_attribute_by_name(module_name: str):
20 | if "." not in module_name:
21 | raise ValueError(f"Unexpected module name: `{module_name}` ,"
22 | f"Expecting something like:`my.sample.library.MyClass` or `my.sample.library.my_method`")
23 |
24 | __module, __attribute = module_name.rsplit('.', 1)
25 | try:
26 | _adapter_module = importlib.import_module(__module)
27 | _adapter_attribute = getattr(_adapter_module, __attribute)
28 | return _adapter_attribute
29 | except ModuleNotFoundError as mnfe:
30 | raise Exception(f"Provided module not found, provided: {module_name}") from mnfe
31 |
32 | @staticmethod
33 | def merge_dicts(dict1: dict, dict2: dict) -> dict:
34 | """
35 | Recursively merges dict2 into dict1, when both values exists dict1 value retained
36 | Returns:
37 | A new dictionary representing the merged result.
38 | """
39 | merged = dict1.copy()
40 |
41 | for key, value in dict2.items():
42 | if key in merged:
43 | # Check if both values are dictionary-like (mappings)
44 | if isinstance(merged[key], dict) and isinstance(value, dict):
45 | # Both are dicts, recurse
46 | merged[key] = Utils.merge_dicts(merged[key], value)
47 | else:
48 | # Add dict2 value if dict2 value is not exists
49 | if not merged.get(key, None):
50 | merged[key] = value
51 | else:
52 | # Key not in dict1, simply add it
53 | merged[key] = value
54 |
55 | return merged
56 |
57 | @staticmethod
58 | def lowercase_dict_keys(input_dict: dict, recursive: bool = False):
59 | if not isinstance(input_dict, dict):
60 | return input_dict
61 |
62 | new_dict = {}
63 | for key, value in input_dict.items():
64 | if isinstance(value, dict) and recursive:
65 | value = Utils.lowercase_dict_keys(value)
66 | if isinstance(key, str):
67 | key = key.lower()
68 |
69 | new_dict[key] = value
70 |
71 | return new_dict
72 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "setuptools-scm"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "opendbt"
7 | version = "0.14.0"
8 | authors = [
9 | { name = "Memiiso Organization" },
10 | ]
11 | description = "opendbt dbt-core with additional features"
12 | readme = "README.md"
13 | requires-python = ">=3.8"
14 | keywords = ["dbt-core", "dbt"]
15 | license = { text = "Apache License 2.0" }
16 | classifiers = [
17 | "Development Status :: 5 - Production/Stable",
18 | "Programming Language :: Python :: 3",
19 | ]
20 | dependencies = [
21 | "dbt-duckdb>=1.6",
22 | "sqlfluff",
23 | "sqlfluff-templater-dbt",
24 | "sqlglot",
25 | "tqdm",
26 | "filelock"
27 | ]
28 | [project.optional-dependencies]
29 | airflow = ["apache-airflow"]
30 | test = ["testcontainers>=4.10", "apache-airflow", "pylint", "dlt[duckdb]"]
31 | dev = ["mkdocs-material"]
32 |
33 | [tool.setuptools]
34 | include-package-data = true
35 | packages = ["opendbt"]
36 |
37 | [project.scripts]
38 | opendbt = "opendbt.__main__:main"
39 |
40 | [project.urls]
41 | Homepage = "https://github.com/memiiso/opendbt"
42 | Documentation = "https://github.com/memiiso/opendbt"
43 | Repository = "https://github.com/memiiso/opendbt"
44 |
--------------------------------------------------------------------------------
/tests/base_dbt_test.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from unittest import TestCase
3 |
4 | from dbt.version import __version__ as dbt_version_str
5 | from dbt.version import get_installed_version as get_dbt_version
6 |
7 | from opendbt import OpenDbtCli
8 |
9 |
10 | class BaseDbtTest(TestCase):
11 | TESTS_ROOT = Path(__file__).parent
12 | PROJECT_ROOT = TESTS_ROOT.parent
13 | RESOURCES_DIR = TESTS_ROOT.joinpath("resources")
14 | DBTCORE_DIR = RESOURCES_DIR.joinpath("dbtcore")
15 | DBTFINANCE_DIR = RESOURCES_DIR.joinpath("dbtfinance")
16 | DBT_VERSION = get_dbt_version()
17 | DBT_VERSION_STR = dbt_version_str
18 |
19 | @classmethod
20 | def setUpClass(cls):
21 | BaseDbtTest.PROJECT_ROOT.joinpath("dev.duckdb").unlink(missing_ok=True)
22 | BaseDbtTest.RESOURCES_DIR.joinpath("dev.duckdb").unlink(missing_ok=True)
23 |
24 | dpf = OpenDbtCli(project_dir=BaseDbtTest.DBTFINANCE_DIR, profiles_dir=BaseDbtTest.DBTFINANCE_DIR)
25 | dpc = OpenDbtCli(project_dir=BaseDbtTest.DBTCORE_DIR, profiles_dir=BaseDbtTest.DBTCORE_DIR)
26 | dpf.invoke(args=["clean"])
27 | dpc.invoke(args=["clean"])
28 |
29 | def setUp(self):
30 | # Setup actions to be performed before each test
31 | BaseDbtTest.PROJECT_ROOT.joinpath("dev.duckdb").unlink(missing_ok=True)
32 | BaseDbtTest.RESOURCES_DIR.joinpath("dev.duckdb").unlink(missing_ok=True)
--------------------------------------------------------------------------------
/tests/resources/airflow/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:slim-2.5.2-python3.8
2 | LABEL authors="opendbt"
3 |
4 | # install additional packages
5 | COPY --chown=airflow:airflow opendbt /tmp/opendbt/opendbt
6 | COPY --chown=airflow:airflow README.md /tmp/opendbt/README.md
7 | COPY --chown=airflow:airflow pyproject.toml /tmp/opendbt/pyproject.toml
8 |
9 | RUN pip install dbt-core==1.8.*
10 | RUN pip install dbt-duckdb==1.8.*
11 | RUN pip install opendbt
12 | EXPOSE 8080
13 |
--------------------------------------------------------------------------------
/tests/resources/airflow/airflow/airflow.cfg:
--------------------------------------------------------------------------------
1 | # Default airflow config of the docker image
2 | [core]
3 | dags_folder = /opt/airflow/dags
4 | hostname_callable = airflow.utils.net.getfqdn
5 | default_timezone = utc
6 | executor = SequentialExecutor
7 | parallelism = 32
8 | max_active_tasks_per_dag = 16
9 | dags_are_paused_at_creation = True
10 | max_active_runs_per_dag = 16
11 | load_examples = True
12 | plugins_folder = /opt/airflow/plugins
13 | execute_tasks_new_python_interpreter = False
14 | fernet_key =
15 | donot_pickle = True
16 | dagbag_import_timeout = 30.0
17 | dagbag_import_error_tracebacks = True
18 | dagbag_import_error_traceback_depth = 2
19 | dag_file_processor_timeout = 50
20 | task_runner = StandardTaskRunner
21 | default_impersonation =
22 | security =
23 | unit_test_mode = False
24 | enable_xcom_pickling = False
25 | allowed_deserialization_classes = airflow\..*
26 | killed_task_cleanup_time = 60
27 | dag_run_conf_overrides_params = True
28 | dag_discovery_safe_mode = True
29 | dag_ignore_file_syntax = regexp
30 | default_task_retries = 0
31 | default_task_retry_delay = 300
32 | default_task_weight_rule = downstream
33 | default_task_execution_timeout =
34 | min_serialized_dag_update_interval = 30
35 | compress_serialized_dags = False
36 | min_serialized_dag_fetch_interval = 10
37 | max_num_rendered_ti_fields_per_task = 30
38 | check_slas = True
39 | xcom_backend = airflow.models.xcom.BaseXCom
40 | lazy_load_plugins = True
41 | lazy_discover_providers = True
42 | hide_sensitive_var_conn_fields = True
43 | sensitive_var_conn_names =
44 | default_pool_task_slot_count = 128
45 | max_map_length = 1024
46 | daemon_umask = 0o077
47 | [database]
48 | sql_alchemy_conn = sqlite:////opt/airflow/airflow.db
49 | sql_engine_encoding = utf-8
50 | sql_alchemy_pool_enabled = True
51 | sql_alchemy_pool_size = 5
52 | sql_alchemy_max_overflow = 10
53 | sql_alchemy_pool_recycle = 1800
54 | sql_alchemy_pool_pre_ping = True
55 | sql_alchemy_schema =
56 | load_default_connections = True
57 | max_db_retries = 3
58 | [logging]
59 | base_log_folder = /opt/airflow/logs
60 | remote_logging = False
61 | remote_log_conn_id =
62 | google_key_path =
63 | remote_base_log_folder =
64 | encrypt_s3_logs = False
65 | logging_level = INFO
66 | celery_logging_level =
67 | fab_logging_level = WARNING
68 | logging_config_class =
69 | colored_console_log = True
70 | colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
71 | colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
72 | log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
73 | simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
74 | dag_processor_log_target = file
75 | dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
76 | log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware
77 | task_log_prefix_template =
78 | log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log
79 | log_processor_filename_template = {{ filename }}.log
80 | dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log
81 | task_log_reader = task
82 | extra_logger_names =
83 | worker_log_server_port = 8793
84 | [metrics]
85 | statsd_on = False
86 | statsd_host = localhost
87 | statsd_port = 8125
88 | statsd_prefix = airflow
89 | statsd_allow_list =
90 | stat_name_handler =
91 | statsd_datadog_enabled = False
92 | statsd_datadog_tags =
93 |
94 | [secrets]
95 | # backend = airflow.providers.hashicorp.secrets.vault.VaultBackend
96 | # backend_kwargs = {"connections_path": "connections", "variables_path": "variables", "mount_point": "airflow", "url": "http://127.0.0.1:8200"}
97 | backend =
98 | backend_kwargs =
99 |
100 | [cli]
101 | api_client = airflow.api.client.local_client
102 | endpoint_url = http://localhost:8080
103 | [debug]
104 | fail_fast = False
105 | [api]
106 | enable_experimental_api = False
107 | auth_backends = airflow.api.auth.backend.session
108 | maximum_page_limit = 100
109 | fallback_page_limit = 100
110 | google_oauth2_audience =
111 | google_key_path =
112 | access_control_allow_headers =
113 | access_control_allow_methods =
114 | access_control_allow_origins =
115 | [lineage]
116 | backend =
117 | [atlas]
118 | sasl_enabled = False
119 | host =
120 | port = 21000
121 | username =
122 | password =
123 | [operators]
124 | default_owner = airflow
125 | default_cpus = 1
126 | default_ram = 512
127 | default_disk = 512
128 | default_gpus = 0
129 | default_queue = default
130 | allow_illegal_arguments = False
131 | [hive]
132 | default_hive_mapred_queue =
133 | [webserver]
134 | base_url = http://localhost:8080
135 | default_ui_timezone = UTC
136 | web_server_host = 0.0.0.0
137 | web_server_port = 8080
138 | web_server_ssl_cert =
139 | web_server_ssl_key =
140 | session_backend = database
141 | web_server_master_timeout = 120
142 | web_server_worker_timeout = 120
143 | worker_refresh_batch_size = 1
144 | worker_refresh_interval = 6000
145 | reload_on_plugin_change = False
146 | secret_key = KpWSnDmjuxdEAVePCn1T4Q==
147 | workers = 4
148 | worker_class = sync
149 | access_logfile = -
150 | error_logfile = -
151 | access_logformat =
152 | expose_config = False
153 | expose_hostname = False
154 | expose_stacktrace = False
155 | dag_default_view = grid
156 | dag_orientation = LR
157 | log_fetch_timeout_sec = 5
158 | log_fetch_delay_sec = 2
159 | log_auto_tailing_offset = 30
160 | log_animation_speed = 1000
161 | hide_paused_dags_by_default = False
162 | page_size = 100
163 | navbar_color = #fff
164 | default_dag_run_display_number = 25
165 | enable_proxy_fix = False
166 | proxy_fix_x_for = 1
167 | proxy_fix_x_proto = 1
168 | proxy_fix_x_host = 1
169 | proxy_fix_x_port = 1
170 | proxy_fix_x_prefix = 1
171 | cookie_secure = False
172 | cookie_samesite = Lax
173 | default_wrap = False
174 | x_frame_enabled = True
175 | show_recent_stats_for_completed_runs = True
176 | update_fab_perms = True
177 | session_lifetime_minutes = 43200
178 | instance_name_has_markup = False
179 | auto_refresh_interval = 3
180 | warn_deployment_exposure = True
181 | audit_view_excluded_events = gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data
182 | [email]
183 | email_backend = airflow.utils.email.send_email_smtp
184 | email_conn_id = smtp_default
185 | default_email_on_retry = True
186 | default_email_on_failure = True
187 | [smtp]
188 | smtp_host = localhost
189 | smtp_starttls = True
190 | smtp_ssl = False
191 | smtp_port = 25
192 | smtp_mail_from = airflow@example.com
193 | smtp_timeout = 30
194 | smtp_retry_limit = 5
195 | [sentry]
196 | sentry_on = false
197 | sentry_dsn =
198 | [local_kubernetes_executor]
199 | kubernetes_queue = kubernetes
200 | [celery_kubernetes_executor]
201 | kubernetes_queue = kubernetes
202 | [celery]
203 | celery_app_name = airflow.executors.celery_executor
204 | worker_concurrency = 16
205 | worker_prefetch_multiplier = 1
206 | worker_enable_remote_control = true
207 | broker_url = redis://redis:6379/0
208 | flower_host = 0.0.0.0
209 | flower_url_prefix =
210 | flower_port = 5555
211 | flower_basic_auth =
212 | sync_parallelism = 0
213 | celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
214 | ssl_active = False
215 | ssl_key =
216 | ssl_cert =
217 | ssl_cacert =
218 | pool = prefork
219 | operation_timeout = 1.0
220 | task_track_started = True
221 | task_adoption_timeout = 600
222 | stalled_task_timeout = 0
223 | task_publish_max_retries = 3
224 | worker_precheck = False
225 | [celery_broker_transport_options]
226 | [dask]
227 | cluster_address = 127.0.0.1:8786
228 | tls_ca =
229 | tls_cert =
230 | tls_key =
231 | [scheduler]
232 | job_heartbeat_sec = 5
233 | scheduler_heartbeat_sec = 5
234 | num_runs = -1
235 | scheduler_idle_sleep_time = 1
236 | min_file_process_interval = 30
237 | parsing_cleanup_interval = 60
238 | dag_dir_list_interval = 300
239 | print_stats_interval = 30
240 | pool_metrics_interval = 5.0
241 | scheduler_health_check_threshold = 30
242 | enable_health_check = False
243 | scheduler_health_check_server_port = 8974
244 | orphaned_tasks_check_interval = 300.0
245 | child_process_log_directory = /opt/airflow/logs/scheduler
246 | scheduler_zombie_task_threshold = 300
247 | zombie_detection_interval = 10.0
248 | catchup_by_default = True
249 | ignore_first_depends_on_past_by_default = True
250 | max_tis_per_query = 512
251 | use_row_level_locking = True
252 | max_dagruns_to_create_per_loop = 10
253 | max_dagruns_per_loop_to_schedule = 20
254 | schedule_after_task_execution = True
255 | parsing_processes = 2
256 | file_parsing_sort_mode = modified_time
257 | standalone_dag_processor = False
258 | max_callbacks_per_loop = 20
259 | dag_stale_not_seen_duration = 600
260 | use_job_schedule = True
261 | allow_trigger_in_future = False
262 | trigger_timeout_check_interval = 15
263 | [triggerer]
264 | default_capacity = 1000
265 | [kerberos]
266 | ccache = /tmp/airflow_krb5_ccache
267 | principal = airflow
268 | reinit_frequency = 3600
269 | kinit_path = kinit
270 | keytab = airflow.keytab
271 | forwardable = True
272 | include_ip = True
273 | [elasticsearch]
274 | host =
275 | log_id_template = {dag_id}-{task_id}-{run_id}-{map_index}-{try_number}
276 | end_of_log_mark = end_of_log
277 | frontend =
278 | write_stdout = False
279 | json_format = False
280 | json_fields = asctime, filename, lineno, levelname, message
281 | host_field = host
282 | offset_field = offset
283 | [elasticsearch_configs]
284 | use_ssl = False
285 | verify_certs = True
286 | [kubernetes_executor]
287 | pod_template_file =
288 | worker_container_repository =
289 | worker_container_tag =
290 | namespace = default
291 | delete_worker_pods = True
292 | delete_worker_pods_on_failure = False
293 | worker_pods_creation_batch_size = 1
294 | multi_namespace_mode = False
295 | in_cluster = True
296 | kube_client_request_args =
297 | delete_option_kwargs =
298 | enable_tcp_keepalive = True
299 | tcp_keep_idle = 120
300 | tcp_keep_intvl = 30
301 | tcp_keep_cnt = 6
302 | verify_ssl = True
303 | worker_pods_pending_timeout = 300
304 | worker_pods_pending_timeout_check_interval = 120
305 | worker_pods_queued_check_interval = 60
306 | worker_pods_pending_timeout_batch_size = 100
307 | [sensors]
308 | default_timeout = 604800
309 |
--------------------------------------------------------------------------------
/tests/resources/airflow/airflow/webserver_config.py:
--------------------------------------------------------------------------------
1 | """Default configuration for the Airflow webserver."""
2 | from __future__ import annotations
3 |
4 | import os
5 |
6 | from flask_appbuilder.const import AUTH_DB
7 |
8 | # from airflow.www.fab_security.manager import AUTH_LDAP
9 | # from airflow.www.fab_security.manager import AUTH_OAUTH
10 | # from airflow.www.fab_security.manager import AUTH_OID
11 | # from airflow.www.fab_security.manager import AUTH_REMOTE_USER
12 |
13 |
14 | basedir = os.path.abspath(os.path.dirname(__file__))
15 |
16 | # Flask-WTF flag for CSRF
17 | WTF_CSRF_ENABLED = True
18 | WTF_CSRF_TIME_LIMIT = None
19 |
20 | # ----------------------------------------------------
21 | # AUTHENTICATION CONFIG
22 | # ----------------------------------------------------
23 | # For details on how to set up each of the following authentication, see
24 | # http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods
25 | # for details.
26 |
27 | # The authentication type
28 | # AUTH_OID : Is for OpenID
29 | # AUTH_DB : Is for database
30 | # AUTH_LDAP : Is for LDAP
31 | # AUTH_REMOTE_USER : Is for using REMOTE_USER from web server
32 | # AUTH_OAUTH : Is for OAuth
33 | AUTH_TYPE = AUTH_DB
34 |
35 | # Uncomment to setup Full admin role name
36 | # AUTH_ROLE_ADMIN = 'Admin'
37 |
38 | # Uncomment and set to desired role to enable access without authentication
39 | AUTH_ROLE_PUBLIC = 'Admin'
40 |
41 | # Will allow user self registration
42 | # AUTH_USER_REGISTRATION = True
43 |
44 | # The recaptcha it's automatically enabled for user self registration is active and the keys are necessary
45 | # RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY
46 | # RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY
47 |
48 | # Config for Flask-Mail necessary for user self registration
49 | # MAIL_SERVER = 'smtp.gmail.com'
50 | # MAIL_USE_TLS = True
51 | # MAIL_USERNAME = 'yourappemail@gmail.com'
52 | # MAIL_PASSWORD = 'passwordformail'
53 | # MAIL_DEFAULT_SENDER = 'sender@gmail.com'
54 |
55 | # The default user self registration role
56 | # AUTH_USER_REGISTRATION_ROLE = "Public"
57 |
58 | # When using OAuth Auth, uncomment to setup provider(s) info
59 | # Google OAuth example:
60 | # OAUTH_PROVIDERS = [{
61 | # 'name':'google',
62 | # 'token_key':'access_token',
63 | # 'icon':'fa-google',
64 | # 'remote_app': {
65 | # 'api_base_url':'https://www.googleapis.com/oauth2/v2/',
66 | # 'client_kwargs':{
67 | # 'scope': 'email profile'
68 | # },
69 | # 'access_token_url':'https://accounts.google.com/o/oauth2/token',
70 | # 'authorize_url':'https://accounts.google.com/o/oauth2/auth',
71 | # 'request_token_url': None,
72 | # 'client_id': GOOGLE_KEY,
73 | # 'client_secret': GOOGLE_SECRET_KEY,
74 | # }
75 | # }]
76 |
77 | # When using LDAP Auth, setup the ldap server
78 | # AUTH_LDAP_SERVER = "ldap://ldapserver.new"
79 |
80 | # When using OpenID Auth, uncomment to setup OpenID providers.
81 | # example for OpenID authentication
82 | # OPENID_PROVIDERS = [
83 | # { 'name': 'Yahoo', 'url': 'https://me.yahoo.com' },
84 | # { 'name': 'AOL', 'url': 'http://openid.aol.com/' },
85 | # { 'name': 'Flickr', 'url': 'http://www.flickr.com/' },
86 | # { 'name': 'MyOpenID', 'url': 'https://www.myopenid.com' }]
87 |
88 | # ----------------------------------------------------
89 | # Theme CONFIG
90 | # ----------------------------------------------------
91 | # Flask App Builder comes up with a number of predefined themes
92 | # that you can use for Apache Airflow.
93 | # http://flask-appbuilder.readthedocs.io/en/latest/customizing.html#changing-themes
94 | # Please make sure to remove "navbar_color" configuration from airflow.cfg
95 | # in order to fully utilize the theme. (or use that property in conjunction with theme)
96 | # APP_THEME = "bootstrap-theme.css" # default bootstrap
97 | # APP_THEME = "amelia.css"
98 | # APP_THEME = "cerulean.css"
99 | # APP_THEME = "cosmo.css"
100 | # APP_THEME = "cyborg.css"
101 | # APP_THEME = "darkly.css"
102 | # APP_THEME = "flatly.css"
103 | # APP_THEME = "journal.css"
104 | # APP_THEME = "lumen.css"
105 | # APP_THEME = "paper.css"
106 | # APP_THEME = "readable.css"
107 | # APP_THEME = "sandstone.css"
108 | # APP_THEME = "simplex.css"
109 | # APP_THEME = "slate.css"
110 | # APP_THEME = "solar.css"
111 | # APP_THEME = "spacelab.css"
112 | # APP_THEME = "superhero.css"
113 | # APP_THEME = "united.css"
114 | # APP_THEME = "yeti.css"
115 |
--------------------------------------------------------------------------------
/tests/resources/airflow/dags/dbt_mesh_workflow.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from airflow import DAG
4 | from airflow.utils.dates import days_ago
5 |
6 | from opendbt.airflow import OpenDbtAirflowProject
7 |
8 | default_args = {
9 | 'owner': 'airflow',
10 | 'depends_on_past': False,
11 | 'email_on_failure': False,
12 | 'email_on_retry': False,
13 | 'retries': 1
14 | }
15 |
16 | with DAG(
17 | dag_id='dbt_mesh_workflow',
18 | default_args=default_args,
19 | description='DAG To run multiple dbt projects',
20 | schedule_interval=None,
21 | start_date=days_ago(3),
22 | catchup=False,
23 | max_active_runs=1
24 | ) as dag:
25 | DBT_PROJ_DIR = Path("/opt/dbtfinance")
26 |
27 | p = OpenDbtAirflowProject(project_dir=DBT_PROJ_DIR, profiles_dir=DBT_PROJ_DIR, target='dev')
28 | p.load_dbt_tasks(dag=dag, include_singular_tests=True, include_dbt_seeds=True)
29 |
--------------------------------------------------------------------------------
/tests/resources/airflow/dags/dbt_tests_workflow.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from airflow import DAG
4 | from airflow.operators.empty import EmptyOperator
5 | from airflow.utils.dates import days_ago
6 |
7 | from opendbt.airflow import OpenDbtAirflowProject
8 |
9 | default_args = {
10 | 'owner': 'airflow',
11 | 'depends_on_past': False,
12 | 'email_on_failure': False,
13 | 'email_on_retry': False,
14 | 'retries': 1
15 | }
16 |
17 | with DAG(
18 | dag_id='dbt_tests_workflow',
19 | default_args=default_args,
20 | description='DAG To run dbt tests',
21 | schedule_interval=None,
22 | start_date=days_ago(3),
23 | catchup=False,
24 | max_active_runs=1
25 | ) as dag:
26 | start = EmptyOperator(task_id="start")
27 | end = EmptyOperator(task_id="end")
28 |
29 | DBT_PROJ_DIR = Path("/opt/dbtcore")
30 |
31 | p = OpenDbtAirflowProject(project_dir=DBT_PROJ_DIR, profiles_dir=DBT_PROJ_DIR, target='dev')
32 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end, resource_type='test')
33 |
--------------------------------------------------------------------------------
/tests/resources/airflow/dags/dbt_workflow.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from airflow import DAG
4 | from airflow.operators.empty import EmptyOperator
5 | from airflow.utils.dates import days_ago
6 |
7 | from opendbt.airflow import OpenDbtAirflowProject
8 |
9 | default_args = {
10 | 'owner': 'airflow',
11 | 'depends_on_past': False,
12 | 'email_on_failure': False,
13 | 'email_on_retry': False,
14 | 'retries': 1
15 | }
16 |
17 | with DAG(
18 | dag_id='dbt_workflow',
19 | default_args=default_args,
20 | description='DAG To run dbt',
21 | schedule_interval=None,
22 | start_date=days_ago(3),
23 | catchup=False,
24 | max_active_runs=1
25 | ) as dag:
26 | start = EmptyOperator(task_id="start")
27 | end = EmptyOperator(task_id="end")
28 |
29 | DBT_PROJ_DIR = Path("/opt/dbtcore")
30 |
31 | p = OpenDbtAirflowProject(project_dir=DBT_PROJ_DIR, profiles_dir=DBT_PROJ_DIR, target='dev')
32 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end, include_singular_tests=True, include_dbt_seeds=True)
33 |
--------------------------------------------------------------------------------
/tests/resources/airflow/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: '2'
2 |
3 | services:
4 | airflow:
5 | build:
6 | dockerfile: tests/resources/airflow/Dockerfile
7 | # NOTE The path can be absolute or relative.
8 | # If it is relative, it is resolved from the Compose file's parent folder.
9 | context: ./../../../
10 | image: opendbt_airflow
11 | entrypoint: airflow standalone
12 | volumes:
13 | # NOTE The path can be absolute or relative.
14 | - ./airflow/webserver_config.py:/opt/airflow/webserver_config.py
15 | - ./airflow/airflow.cfg:/opt/airflow/airflow.cfg
16 | - ./dags:/opt/airflow/dags:rw
17 | - ./plugins:/opt/airflow/plugins:rw
18 | - ./../dbtcore:/opt/dbtcore:rw
19 | - ./../dbtfinance:/opt/dbtfinance:rw
20 | - ./../../../opendbt/macros:/opt/dbtcore/macros:rw
21 | environment:
22 | - AIRFLOW__WEBSERVER__INSTANCE_NAME=LOCAL
23 | - AIRFLOW_ENVIRONMENT=LOCAL
24 | ports:
25 | - "8080"
26 |
--------------------------------------------------------------------------------
/tests/resources/airflow/plugins/airflow_dbtdocs_page.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from opendbt.airflow import plugin
4 |
5 | # create public page on airflow server to serve DBT docs
6 | airflow_dbtdocs_page = plugin.init_plugins_dbtdocs_page(Path("/opt/dbtcore/target"))
7 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | target/
3 | dbt_packages/
4 | logs/
5 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/.sqlfluff:
--------------------------------------------------------------------------------
1 | [sqlfluff]
2 | templater = dbt
3 | dialect = duckdb
4 | # This change (from jinja to dbt templater) will make linting slower
5 | # because linting will first compile dbt code into data warehouse code.
6 | runaway_limit = 1000
7 | max_line_length = 180
8 | indent_unit = space
9 |
10 | [sqlfluff:indentation]
11 | tab_space_size = 4
12 |
13 | [sqlfluff:layout:type:comma]
14 | spacing_before = touch
15 | line_position = trailing
16 |
17 | # For rule specific configuration, use dots between the names exactly
18 | # as you would in .sqlfluff. In the background, SQLFluff will unpack the
19 | # configuration paths accordingly.
20 | [tool.sqlfluff.rules.capitalisation.keywords]
21 | capitalisation_policy = "upper"
22 |
23 | # The default configuration for capitalisation rules is "consistent"
24 | # which will auto-detect the setting from the rest of the file. This
25 | # is less desirable in a new project and you may find this (slightly
26 | # more strict) setting more useful.
27 | # Typically we find users rely on syntax highlighting rather than
28 | # capitalisation to distinguish between keywords and identifiers.
29 | # Clearly, if your organisation has already settled on uppercase
30 | # formatting for any of these syntax elements then set them to "upper".
31 | # See https://stackoverflow.com/questions/608196/why-should-i-capitalize-my-sql-keywords-is-there-a-good-reason
32 | [sqlfluff:rules:capitalisation.keywords]
33 | capitalisation_policy = upper
34 | [sqlfluff:rules:capitalisation.identifiers]
35 | capitalisation_policy = upper
36 | [sqlfluff:rules:capitalisation.functions]
37 | extended_capitalisation_policy = upper
38 | # [sqlfluff:rules:capitalisation.literals]
39 | # capitalisation_policy = lower
40 | [sqlfluff:rules:capitalisation.types]
41 | extended_capitalisation_policy = upper
42 |
43 | [sqlfluff:rules:aliasing.table]
44 | aliasing = explicit
45 |
46 | [sqlfluff:rules:aliasing.column]
47 | aliasing = explicit
48 |
49 | [sqlfluff:rules:aliasing.expression]
50 | allow_scalar = False
51 |
52 | [sqlfluff:rules:ambiguous.column_references] # Number in group by
53 | group_by_and_order_by_style = implicit
--------------------------------------------------------------------------------
/tests/resources/dbtcore/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: 'dbtcore'
2 | version: '1.0.0'
3 |
4 | profile: 'dbtcore'
5 |
6 | # include opendbt macros
7 | macro-paths: [ "macros", "../../../opendbt/macros/" ]
8 | # use opendbt index.html for docs
9 | docs-paths: [ "../../../opendbt/docs/" ]
10 |
11 | clean-targets:
12 | - "target"
13 | - "dbt_packages"
14 | - "logs"
15 |
16 | models:
17 | dbtcore:
18 | +materialized: table
19 |
20 | vars:
21 | dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom
22 | dbt_callbacks: opendbt.examples.email_dbt_test_callback
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_core_table1.sql:
--------------------------------------------------------------------------------
1 | with source_data as (
2 | select 1 as id, 'row1' as row_data
3 | union all
4 | select 2 as id, 'row1' as row_data
5 | )
6 |
7 | SELECT *
8 | FROM source_data
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_executedlt_model.py:
--------------------------------------------------------------------------------
1 | import dlt
2 | from dlt.pipeline import TPipeline
3 |
4 |
5 | @dlt.resource(
6 | columns={"event_tstamp": {"data_type": "timestamp", "precision": 3}},
7 | primary_key="event_id",
8 | )
9 | def events():
10 | yield [{"event_id": 1, "event_tstamp": "2024-07-30T10:00:00.123"},
11 | {"event_id": 2, "event_tstamp": "2025-02-30T10:00:00.321"}]
12 |
13 |
14 | def model(dbt, pipeline: TPipeline):
15 | """
16 |
17 | :param dbt:
18 | :param pipeline: Pre-configured dlt pipeline. dlt target connection and dataset is pre-set using the model config!
19 | :return:
20 | """
21 | dbt.config(materialized="executedlt")
22 | print("========================================================")
23 | print(f"INFO: DLT Pipeline pipeline_name:{pipeline.pipeline_name}")
24 | print(f"INFO: DLT Pipeline dataset_name:{pipeline.dataset_name}")
25 | print(f"INFO: DLT Pipeline dataset_name:{pipeline}")
26 | print(f"INFO: DLT Pipeline staging:{pipeline.staging}")
27 | print(f"INFO: DLT Pipeline destination:{pipeline.destination}")
28 | print(f"INFO: DLT Pipeline _pipeline_storage:{pipeline._pipeline_storage}")
29 | print(f"INFO: DLT Pipeline _schema_storage:{pipeline._schema_storage}")
30 | print(f"INFO: DLT Pipeline state:{pipeline.state}")
31 | print(f"INFO: DBT this:{dbt.this}")
32 | print("========================================================")
33 | load_info = pipeline.run(events(), table_name=str(str(dbt.this).split('.')[-1]).strip('"'))
34 | print(load_info)
35 | row_counts = pipeline.last_trace.last_normalize_info
36 | print(row_counts)
37 | print("========================================================")
38 | return None
39 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_executepython_dlt_model.py:
--------------------------------------------------------------------------------
1 | import dlt
2 |
3 |
4 | @dlt.resource(
5 | columns={"event_tstamp": {"data_type": "timestamp", "precision": 3}},
6 | primary_key="event_id",
7 | )
8 | def events():
9 | yield [{"event_id": 1, "event_tstamp": "2024-07-30T10:00:00.123"},
10 | {"event_id": 2, "event_tstamp": "2025-02-30T10:00:00.321"}]
11 |
12 |
13 | def model(dbt, session):
14 | dbt.config(materialized="executepython")
15 | print("========================================================")
16 | print(f"INFO: DLT Version:{dlt.version.__version__}")
17 | print(f"INFO: DBT Duckdb Session:{type(session)}")
18 | print(f"INFO: DBT Duckdb Connection:{type(session._env.conn)}")
19 | print("========================================================")
20 | p = dlt.pipeline(
21 | pipeline_name="dbt_dlt",
22 | destination=dlt.destinations.duckdb(session._env.conn),
23 | dataset_name=dbt.this.schema,
24 | dev_mode=False,
25 | )
26 | load_info = p.run(events())
27 | print(load_info)
28 | row_counts = p.last_trace.last_normalize_info
29 | print(row_counts)
30 | print("========================================================")
31 | return None
32 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_executepython_model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 |
4 | from dbt import version
5 |
6 |
7 | def print_info():
8 | _str = f"name:{os.name}, system:{platform.system()} release:{platform.release()}"
9 | _str += f"\npython version:{platform.python_version()}, dbt:{version.__version__}"
10 | print(_str)
11 |
12 |
13 | def model(dbt, session):
14 | dbt.config(materialized="executepython")
15 | print("==================================================")
16 | print("========IM LOCALLY EXECUTED PYTHON MODEL==========")
17 | print("==================================================")
18 | print_info()
19 | print("==================================================")
20 | print("===============MAKE DBT GREAT AGAIN===============")
21 | print("==================================================")
22 | return None
23 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_executesql_dbt_model.sql:
--------------------------------------------------------------------------------
1 | {{ config(materialized='executesql') }}
2 |
3 |
4 | create or replace table my_execute_dbt_model
5 | as
6 |
7 | select 123 as column1
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_failing_dbt_model.sql:
--------------------------------------------------------------------------------
1 | select non_exists_column as my_failing_column
2 | from {{ ref('my_first_dbt_model') }}
3 | where id = 1
4 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_first_dbt_model.sql:
--------------------------------------------------------------------------------
1 | {{ config(materialized='table') }}
2 |
3 | with source_data as (
4 | select 1 as id, 'test-value' as data_value, 'test-value' as column_3
5 | union all
6 | select 1 as id, 'test-value' as data_value, 'test-value' as column_3
7 | union all
8 | select 2 as id, 'test-value' as data_value, 'test-value' as column_3
9 | union all
10 | select null as id, 'test-value' as data_value, 'test-value' as column_3
11 | )
12 | SELECT *
13 | FROM source_data
14 | -- where id is not null
15 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/my_second_dbt_model.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | t1.id AS pk_id,
3 | t1.data_value AS data_value1,
4 | CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2,
5 | t3.event_tstamp AS event_tstamp
6 | FROM {{ ref('my_first_dbt_model') }} AS t1
7 | LEFT JOIN {{ ref('my_core_table1') }} AS t2 ON t1.id = t2.id
8 | LEFT JOIN {{ ref('my_executedlt_model') }} AS t3 ON t1.id = t3.event_id
9 | WHERE t1.id IN (1, 2)
10 |
--------------------------------------------------------------------------------
/tests/resources/dbtcore/models/schema.yml:
--------------------------------------------------------------------------------
1 |
2 | version: 2
3 |
4 | models:
5 | - name: my_first_dbt_model
6 | description: >
7 | # A starter dbt model
8 |
9 | this is a __sample__ model used as an example
10 | columns:
11 | - name: data_value
12 | - name: column_3
13 | - name: id
14 | description: "The **primary key** for this table"
15 | tests:
16 | - unique:
17 | config:
18 | severity: error
19 | error_if: ">1000"
20 | warn_if: ">0"
21 | - not_null:
22 | config:
23 | severity: error
24 | error_if: ">1000"
25 | warn_if: ">0"
26 |
27 | - name: my_second_dbt_model
28 | description: "A starter dbt model"
29 | columns:
30 | - name: pk_id
31 | description: "The primary key for this table"
32 | data_tests:
33 | - unique
34 | - not_null
35 | - name: data_value1
36 | - name: data_value2
37 | - name: event_tstamp
38 | - name: my_core_table1
39 | columns:
40 | - name: id
41 | - name: row_data
42 | - name: my_executedlt_model
43 | columns:
44 | - name: event_id
45 | - name: event_tstamp
46 | - name: my_executepython_model
47 | columns:
48 | - name: event_id
49 | - name: event_tstamp
--------------------------------------------------------------------------------
/tests/resources/dbtcore/profiles.yml:
--------------------------------------------------------------------------------
1 | dbtcore:
2 | outputs:
3 | dev:
4 | type: duckdb
5 | schema: core
6 | path: ./../dev.duckdb
7 | threads: 1
8 |
9 | target: dev
10 |
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | target/
3 | dbt_packages/
4 | logs/
5 |
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: 'dbtfinance'
2 | version: '1.0.0'
3 |
4 | profile: 'dbtfinance'
5 |
6 | # directories to be removed by `dbt clean`
7 | clean-targets:
8 | - "target"
9 | - "dbt_packages"
10 | - "logs"
11 |
12 | models:
13 | # ensure referenced models are used with correct schema
14 | dbtcore:
15 | schema: "core"
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/dependencies.yml:
--------------------------------------------------------------------------------
1 | #packages:
2 | # - package: dbt-labs/dbt_utils
3 | # version: 1.1.1
4 |
5 | # case-sensitive and matches the 'name' in the 'dbt_project.yml'
6 | projects:
7 | - name: dbtcore
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/macros/generate_schema_name.sql:
--------------------------------------------------------------------------------
1 | {% macro generate_schema_name(custom_schema_name, node) -%}
2 |
3 | {%- set default_schema = target.schema -%}
4 | {%- if custom_schema_name is none -%}
5 |
6 | {{ default_schema }}
7 |
8 | {%- else -%}
9 |
10 | {# HERE we are overriding `generate_schema_name` macro generation.
11 | which is concatenating custom schema name and default schema.
12 | #}
13 | {{ custom_schema_name | trim }}
14 |
15 | {%- endif -%}
16 |
17 | {%- endmacro %}
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/models/my_cross_project_ref_model.sql:
--------------------------------------------------------------------------------
1 |
2 | select
3 | id,
4 | row_data,
5 | count(*) as num_rows
6 | from {{ ref('dbtcore', 'my_core_table1') }}
7 | -- fake second dependency {{ source('core', 'my_executepython_model') }}
8 | group by 1,2
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/models/sources.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | # defining `dbtcore` project models as source!
5 | - name: core
6 | schema: core
7 | tables:
8 | - name: my_executepython_model
9 | - name: my_executepython_dlt_model
--------------------------------------------------------------------------------
/tests/resources/dbtfinance/profiles.yml:
--------------------------------------------------------------------------------
1 | dbtfinance:
2 | outputs:
3 | dev:
4 | type: duckdb
5 | schema: finance
6 | path: ./../dev.duckdb
7 | threads: 1
8 |
9 | target: dev
10 |
--------------------------------------------------------------------------------
/tests/test_airflow.py:
--------------------------------------------------------------------------------
1 | import os
2 | import unittest
3 | from pathlib import Path
4 | from time import sleep
5 |
6 | from testcontainers.compose import DockerCompose
7 |
8 |
9 | @unittest.skip("Manual test")
10 | class TestAirflowBase(unittest.TestCase):
11 | """
12 | Test class used to do airflow tests.
13 | uses airflow docker image and mounts current code into it.
14 | login is disabled all users can access the UI as Admin. Airflow is set up as Public
15 | """
16 | _compose: DockerCompose = None
17 | resources_dir = Path(__file__).parent.joinpath('resources')
18 |
19 | @classmethod
20 | def setUpClass(cls):
21 | os.chdir(cls.resources_dir.joinpath('airflow').as_posix())
22 | cls._compose = DockerCompose(cls.resources_dir.joinpath('airflow').as_posix(),
23 | compose_file_name="docker-compose.yaml",
24 | # build=True,
25 | docker_command_path='podman'
26 | )
27 | cls._compose.stop()
28 | cls._compose.start()
29 | print(f"http://localhost:{cls._compose.get_service_port('airflow', 8080)}/home")
30 | print(f"http://localhost:{cls._compose.get_service_port('airflow', 8080)}/dbtdocs")
31 | print(f"http://localhost:{cls._compose.get_service_port('airflow', 8080)}/dbtdocs/perf_info.json")
32 |
33 | @classmethod
34 | def tearDownClass(cls):
35 | print("Running tearDownClass")
36 | if cls._compose:
37 | cls._compose.stop()
38 |
39 | def __exit__(self, exc_type, exc_val, traceback):
40 | if self._compose:
41 | self._compose.stop()
42 |
43 | def test_start_airflow_local_and_wait(self):
44 | """
45 | used to deploy the code inside docker airflow locally. UI login is disabled and made public!
46 | useful to run local airflow with the new code changes and check the changes in airflow ui
47 | while its running all the code changes are reflected in airflow after short time.
48 | :return:
49 | """
50 | sleep(99999999)
51 |
--------------------------------------------------------------------------------
/tests/test_catalog.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from base_dbt_test import BaseDbtTest
4 | from opendbt import OpenDbtProject
5 | from opendbt.catalog import OpenDbtCatalog
6 |
7 |
8 | class TestOpenDbtCatalog(BaseDbtTest):
9 |
10 | def test_catalog_loading(self):
11 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
12 | dp.run(command="docs", args=['generate'])
13 | catalog = OpenDbtCatalog(
14 | manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'),
15 | catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json'))
16 | self.assertIn("model.dbtfinance.my_cross_project_ref_model", catalog.nodes.keys())
17 | self.assertIn("model.dbtcore.my_core_table1", catalog.nodes.keys())
18 | # print(extractor.nodes.get("model.dbtcore.my_core_table1").columns)
19 | model1 = catalog.nodes.get("model.dbtfinance.my_cross_project_ref_model")
20 | model1_schema = model1.db_schema_dict(include_parents=True)
21 | self.assertIn("dev", model1_schema)
22 | self.assertIn("finance", model1_schema["dev"])
23 | self.assertIn("my_core_table1", model1_schema["dev"]["core"])
24 | self.assertIn("my_cross_project_ref_model", model1_schema["dev"]["finance"])
25 | # self.assertIn("row_data", model1_schema["dev"]["main"]['my_core_table1'])
26 |
27 | self.assertIn("num_rows", model1.populate_lineage(catalog.tables2nodes))
28 | self.assertIn("row_data", model1.populate_lineage(catalog.tables2nodes))
29 |
30 | @unittest.skip("reason for skipping")
31 | def test_catalog_export(self):
32 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
33 | dp.run(command="compile")
34 | dp.run(command="run", args=['--select', '+my_second_dbt_model'])
35 | dp.run(command="docs", args=['generate'])
36 | catalog = OpenDbtCatalog(
37 | manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'),
38 | catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json'))
39 | catalog.export()
40 |
41 | def test_catalog_export_one_node(self):
42 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
43 | dp.run(command="compile")
44 | dp.run(command="run", args=['--select', '+my_second_dbt_model'])
45 | dp.run(command="docs", args=['generate'])
46 | catalog = OpenDbtCatalog(
47 | manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'),
48 | catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json'))
49 | node = catalog.node(node_id="model.dbtcore.my_second_dbt_model")
50 | result = node.parent_db_schema_dict()
51 | self.assertIn("my_first_dbt_model", result["dev"]["core"])
52 | self.assertIn("column_3", result["dev"]["core"]["my_first_dbt_model"])
53 |
--------------------------------------------------------------------------------
/tests/test_custom_adapter.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from packaging.version import Version
4 |
5 | from base_dbt_test import BaseDbtTest
6 | from opendbt import OpenDbtProject
7 |
8 |
9 | class TestOpenDbtProject(BaseDbtTest):
10 |
11 | def test_run_with_custom_adapter(self):
12 | if Version(self.DBT_VERSION.to_version_string(skip_matcher=True)) > Version("1.8.0"):
13 | dbt_custom_adapter = 'opendbt.examples.DuckDBAdapterTestingOnlyDbt18'
14 | else:
15 | dbt_custom_adapter = 'opendbt.examples.DuckDBAdapterTestingOnlyDbt17'
16 |
17 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
18 | args=['--vars', f"{{'dbt_custom_adapter': '{dbt_custom_adapter}'}}"])
19 | with self.assertRaises(Exception) as context:
20 | sys.tracebacklimit = 0
21 | dp.run(command="compile")
22 | self.assertTrue("Custom user defined test adapter activated" in str(context.exception))
23 | with self.assertRaises(Exception) as context:
24 | sys.tracebacklimit = 0
25 | dp.run(command="compile")
26 | self.assertTrue("Custom user defined test adapter activated" in str(context.exception))
27 |
28 | def test_run_with_custom_adapter_mmodule_not_found(self):
29 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
30 | args=['--vars', '{dbt_custom_adapter: not.exits.module.MyDbtTestAdapterV1}']
31 | )
32 | with self.assertRaises(Exception) as context:
33 | sys.tracebacklimit = 0
34 | dp.run(command="compile")
35 | self.assertTrue("Module of provided adapter not found" in str(context.exception))
36 |
37 | def test_run_with_custom_adapter_class_not_found(self):
38 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
39 | args=['--vars', '{dbt_custom_adapter: test_custom_adapter.NotExistsAdapterClass}']
40 | )
41 | with self.assertRaises(Exception) as context:
42 | sys.tracebacklimit = 0
43 | dp.run(command="compile")
44 | self.assertTrue("as no attribute 'NotExistsAdapterClass'" in str(context.exception))
45 |
46 | def test_run_with_custom_adapter_wrong_name(self):
47 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
48 | args=['--vars', 'dbt_custom_adapter: test_custom_adapterMyDbtTestAdapterV1']
49 | )
50 | with self.assertRaises(Exception) as context:
51 | sys.tracebacklimit = 0
52 | dp.run(command="compile")
53 | self.assertTrue("Unexpected adapter class name" in str(context.exception))
54 |
--------------------------------------------------------------------------------
/tests/test_dbt_docs.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import semver
4 |
5 | from base_dbt_test import BaseDbtTest
6 | from opendbt import OpenDbtProject
7 |
8 |
9 | class TestDbtDocs(BaseDbtTest):
10 |
11 | def test_run_docs_generate(self):
12 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
13 | # run to generate run_results.json and run_info.json file
14 | try:
15 | dp.run(command="build")
16 | except:
17 | pass
18 | dp.run(command="docs", args=['generate'])
19 | self.assertTrue(self.DBTCORE_DIR.joinpath('target/catalogl.json').exists())
20 | if semver.Version.parse(self.DBT_VERSION_STR) >= semver.Version.parse("1.8.0"):
21 | self.assertTrue(self.DBTCORE_DIR.joinpath('target/run_info.json').exists())
22 |
23 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
24 | # dp.run(command="run")
25 | dp.run(command="docs", args=['generate'])
26 | index_html = self.DBTFINANCE_DIR.joinpath('target/index.html').read_text()
27 | # new html docs page
28 | self.assertTrue("tailwindcss" in str(index_html))
29 | self.assertTrue("vue.global.min.js" in str(index_html))
30 | self.assertTrue(self.DBTFINANCE_DIR.joinpath('target/catalogl.json').exists())
31 |
32 | @unittest.skip("reason for skipping")
33 | def test_run_docs_serve(self):
34 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
35 | dp.run(command="docs", args=['generate'])
36 | dp.run(command="docs", args=['serve'])
37 |
--------------------------------------------------------------------------------
/tests/test_dbt_sqlfluff.py:
--------------------------------------------------------------------------------
1 | from base_dbt_test import BaseDbtTest
2 | from opendbt import OpenDbtProject
3 |
4 |
5 | class TestDbtSqlFluff(BaseDbtTest):
6 |
7 | def test_run_sqlfluff_lint(self):
8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
9 | dp.run(command="sqlfluff", args=['fix'])
10 | dp.run(command="sqlfluff", args=['lint'])
11 |
12 | def test_run_sqlfluff_fix(self):
13 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
14 | dp.run(command="sqlfluff", args=['fix'])
15 |
--------------------------------------------------------------------------------
/tests/test_executedlt_materialization.py:
--------------------------------------------------------------------------------
1 | from base_dbt_test import BaseDbtTest
2 | from opendbt import OpenDbtProject
3 |
4 |
5 | class TestOpenDbtProject(BaseDbtTest):
6 |
7 | def test_run_executedlt_materialization(self):
8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
9 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom'])
10 | dp.run(command="run", args=['--select', 'my_executedlt_model'])
11 |
--------------------------------------------------------------------------------
/tests/test_executepython_materialization.py:
--------------------------------------------------------------------------------
1 | from base_dbt_test import BaseDbtTest
2 | from opendbt import OpenDbtProject
3 |
4 |
5 | class TestOpenDbtProject(BaseDbtTest):
6 |
7 | def test_run_executepython_materialization(self):
8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
9 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom'])
10 | dp.run(command="run", args=['--select', 'my_executepython_model'])
11 |
12 | def test_run_executepython_dlt_pipeline(self):
13 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
14 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom'])
15 | dp.run(command="run", args=['--select', 'my_executepython_dlt_model'])
16 |
17 | def test_run_executepython_materialization_subprocess(self):
18 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR,
19 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom'])
20 | dp.run(command="run", args=['--select', 'my_executepython_model'], use_subprocess=True)
21 |
--------------------------------------------------------------------------------
/tests/test_executesql_materialization.py:
--------------------------------------------------------------------------------
1 | from base_dbt_test import BaseDbtTest
2 | from opendbt import OpenDbtProject
3 |
4 |
5 | class TestOpenDbtProject(BaseDbtTest):
6 |
7 | def test_run_executesql_materialization(self):
8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
9 | dp.run(command="run", args=['--select', 'my_executesql_dbt_model'])
10 |
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from pathlib import Path
4 | from unittest.mock import patch, Mock
5 |
6 | from base_dbt_test import BaseDbtTest
7 | from opendbt.__main__ import main
8 |
9 |
10 | class TestOpenDbtCliMain(BaseDbtTest):
11 |
12 | @patch("opendbt.__main__.OpenDbtCli")
13 | def test_main_with_project_dir_arg(self, mock_cli):
14 | test_project_dir = self.DBTFINANCE_DIR.resolve()
15 | test_profiles_dir = self.DBTFINANCE_DIR.resolve()
16 | sys.argv = ["main.py", "--project-dir", str(test_project_dir), "--profiles-dir", str(test_profiles_dir), "ls"]
17 | mock_instance = Mock(project_dir=test_project_dir, profiles_dir=test_profiles_dir)
18 | mock_cli.return_value = mock_instance
19 | main()
20 | mock_cli.assert_called_once_with(project_dir=test_project_dir, profiles_dir=test_profiles_dir)
21 | mock_instance.invoke.assert_called_once_with(args=['ls'])
22 |
--------------------------------------------------------------------------------
/tests/test_opendbt_airflow.py:
--------------------------------------------------------------------------------
1 | from airflow import DAG
2 | from airflow.utils.dates import days_ago
3 |
4 | from base_dbt_test import BaseDbtTest
5 | from opendbt.airflow import OpenDbtAirflowProject
6 |
7 |
8 | class TestOpenDbtProject(BaseDbtTest):
9 |
10 | def get_dag(self):
11 | return DAG(
12 | dag_id='dbt_test_workflow',
13 | schedule_interval=None,
14 | start_date=days_ago(3),
15 | catchup=False,
16 | max_active_runs=1
17 | )
18 |
19 | def test_run_dbt_as_airflow_task(self):
20 | with self.get_dag() as dag:
21 | # load dbt jobs to airflow dag
22 | p = OpenDbtAirflowProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, target='dev')
23 | p.load_dbt_tasks(dag=dag,
24 | include_singular_tests=True,
25 | include_dbt_seeds=True)
26 |
27 | for j in dag.tasks:
28 | if 'my_first_dbt_model' in j.task_id:
29 | j.execute({})
30 | if 'my_executedlt_model' in j.task_id:
31 | j.execute({})
32 | if 'my_executepython_model' in j.task_id:
33 | j.execute({})
34 |
--------------------------------------------------------------------------------
/tests/test_opendbt_cli.py:
--------------------------------------------------------------------------------
1 | import json
2 | import unittest
3 |
4 | import semver
5 | from dbt.exceptions import DbtRuntimeError
6 |
7 | from base_dbt_test import BaseDbtTest
8 | from opendbt import OpenDbtProject, OpenDbtCli
9 | from opendbt.examples import email_dbt_test_callback
10 |
11 |
12 | class TestOpenDbtCli(BaseDbtTest):
13 |
14 | def test_run_failed(self):
15 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
16 | with self.assertRaises(DbtRuntimeError) as context:
17 | dp.run(command="run", args=['--select', '+my_failing_dbt_model'])
18 |
19 | self.assertIn('Referenced column "non_exists_column" not found in FROM clause', str(context.exception.msg))
20 |
21 | def test_cli_attributes(self):
22 | dp = OpenDbtCli(project_dir=self.DBTCORE_DIR)
23 | self.assertEqual(dp.project.project_name, "dbtcore")
24 | self.assertEqual(dp.project.profile_name, "dbtcore")
25 | self.assertIn('dbt_custom_adapter', dp.project_vars)
26 | self.assertIn('dbt_callbacks', dp.project_vars)
27 | self.assertEqual(dp.project_vars['dbt_custom_adapter'], 'opendbt.examples.DuckDBAdapterV2Custom')
28 |
29 | def test_cli_callbacks(self):
30 | dp = OpenDbtCli(project_dir=self.DBTCORE_DIR)
31 | self.assertIn(email_dbt_test_callback, dp.project_callbacks)
32 |
33 | with self.assertLogs('dbtcallbacks', level='INFO') as cm:
34 | try:
35 | dp.invoke(args=["test", '--select', 'my_core_table1 my_first_dbt_model', "--profiles-dir",
36 | dp.project_dir.as_posix()])
37 | except:
38 | pass
39 |
40 | self.assertIn('DBT callback `email_dbt_test_callback` called', str(cm.output))
41 | self.assertIn('Callback email sent', str(cm.output))
42 | # self.assertIn('dbt test', str(cm.output))
43 |
44 | def test_cli_run_models(self):
45 | dp = OpenDbtCli(project_dir=self.DBTCORE_DIR)
46 | dp.invoke(args=['run', "--exclude", "my_failing_dbt_model", "--profiles-dir", dp.project_dir.as_posix()])
47 |
48 | def test_cli_run_cross_project_ref_models(self):
49 | dpf = OpenDbtCli(project_dir=self.DBTFINANCE_DIR)
50 | dpf.invoke(
51 | args=['run', '--select', '+my_cross_project_ref_model', "--profiles-dir", dpf.project_dir.as_posix()])
52 |
53 | @unittest.skipIf(semver.Version.parse(BaseDbtTest.DBT_VERSION_STR) < semver.Version.parse("1.8.0"), 'skip')
54 | def test_cli_run_result(self):
55 | run_info = self.DBTCORE_DIR.joinpath("target/run_info.json")
56 | if run_info.exists():
57 | run_info.write_text('')
58 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
59 | dp.run(command="build", args=['--select', 'my_core_table1'])
60 | data = json.loads(run_info.read_text())
61 | self.assertEqual(1, len(data['nodes']))
62 | self.assertIn("model.dbtcore.my_core_table1", data['nodes'])
63 | print(json.dumps(data, indent=4))
64 |
65 | dp.run(command="build", args=['--select', 'my_executesql_dbt_model'])
66 | data = json.loads(run_info.read_text())
67 | self.assertEqual(2, len(data['nodes']))
68 | self.assertIn("model.dbtcore.my_executesql_dbt_model", data['nodes'])
69 | print(json.dumps(data, indent=4))
70 |
--------------------------------------------------------------------------------
/tests/test_opendbt_mesh.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from base_dbt_test import BaseDbtTest
4 | from opendbt import OpenDbtProject
5 |
6 |
7 | class TestOpenDbtMesh(BaseDbtTest):
8 |
9 | def test_run_cross_project(self):
10 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
11 | dp.run(command="compile")
12 |
13 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
14 | dp.run(command="compile")
15 |
16 | manifest = json.loads(self.DBTFINANCE_DIR.joinpath("target/manifest.json").read_text())
17 | model = manifest.get("nodes").get("model.dbtfinance.my_cross_project_ref_model", {})
18 | print(model)
19 | self.assertEqual(model["database"], 'dev')
20 | self.assertEqual(model['schema'], 'finance')
21 | self.assertEqual(model['name'], 'my_cross_project_ref_model')
22 |
23 | model = manifest.get("nodes").get("model.dbtcore.my_core_table1", {})
24 | self.assertEqual(model['database'], 'dev')
25 | self.assertEqual(model['schema'], 'core')
26 | self.assertEqual(model['name'], 'my_core_table1')
27 | print(model)
28 |
--------------------------------------------------------------------------------
/tests/test_opendbt_project.py:
--------------------------------------------------------------------------------
1 | from base_dbt_test import BaseDbtTest
2 | from opendbt import OpenDbtProject
3 |
4 |
5 | class TestOpenDbtProject(BaseDbtTest):
6 | def test_run_compile(self):
7 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
8 | dp.run(command="compile")
9 |
10 | def test_run_run(self):
11 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
12 | dp.run(command="run",
13 | args=['--select', '+my_second_dbt_model+', "--exclude", "my_failing_dbt_model"],
14 | use_subprocess=True)
15 |
16 | def test_project_attributes(self):
17 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
18 | self.assertEqual(dp.project.profile_name, "dbtcore")
19 | self.assertEqual(dp.project_vars['dbt_custom_adapter'], 'opendbt.examples.DuckDBAdapterV2Custom')
20 |
--------------------------------------------------------------------------------