├── .github ├── dependabot.yml └── workflows │ ├── deploy-documentation.yml │ ├── release-test.yml │ ├── release.yml │ ├── tests-dbt-version.yml │ └── tests.yml ├── .gitignore ├── .idea ├── .gitignore ├── git_toolbox_blame.xml ├── git_toolbox_prj.xml ├── misc.xml ├── modules.xml ├── opendbt.iml ├── runConfigurations │ ├── Python_tests_in_tests.xml │ ├── pip_install.xml │ └── pylint.xml └── vcs.xml ├── .pylintrc ├── LICENSE ├── README.md ├── docs ├── assets │ ├── airflow-dbt-docs-link.png │ ├── airflow-dbt-docs-page.png │ ├── airflow-dbt-flow.png │ ├── dbt-custom-adapter-python.png │ ├── dbt-local-python.png │ ├── docs-columns-transformation.png │ ├── docs-dependencies.png │ ├── docs-lineage.png │ ├── docs-run-info-error.png │ ├── docs-run-info.png │ └── opendbt-airflow-ui.png ├── catalog.md ├── examples.md ├── index.md └── opendbtdocs │ ├── catalog.json │ ├── catalogl.json │ ├── index.html │ ├── manifest.json │ ├── run_info.json │ └── run_results.json ├── mkdocs.yml ├── opendbt ├── __init__.py ├── __main__.py ├── airflow │ ├── __init__.py │ └── plugin.py ├── catalog │ └── __init__.py ├── dbt │ ├── __init__.py │ ├── docs │ │ ├── .gitignore │ │ └── index.html │ ├── shared │ │ ├── __init__.py │ │ ├── adapters │ │ │ ├── __init__.py │ │ │ └── impl.py │ │ ├── cli │ │ │ ├── __init__.py │ │ │ └── main.py │ │ └── task │ │ │ ├── __init__.py │ │ │ └── sqlfluff.py │ ├── v17 │ │ ├── __init__.py │ │ ├── adapters │ │ │ ├── __init__.py │ │ │ └── factory.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── runtime.py │ │ └── task │ │ │ ├── __init__.py │ │ │ ├── docs │ │ │ ├── __init__.py │ │ │ └── generate.py │ │ │ └── run.py │ └── v18 │ │ ├── __init__.py │ │ ├── adapters │ │ ├── __init__.py │ │ └── factory.py │ │ ├── artifacts │ │ ├── __init__.py │ │ └── schemas │ │ │ ├── __init__.py │ │ │ └── run.py │ │ ├── config │ │ ├── __init__.py │ │ └── runtime.py │ │ └── task │ │ ├── __init__.py │ │ ├── docs │ │ ├── __init__.py │ │ └── generate.py │ │ └── run.py ├── examples.py ├── logger.py ├── macros │ ├── executedlt.sql │ ├── executepython.sql │ └── executesql.sql ├── runtime_patcher.py └── utils.py ├── pyproject.toml └── tests ├── base_dbt_test.py ├── resources ├── airflow │ ├── Dockerfile │ ├── airflow │ │ ├── airflow.cfg │ │ └── webserver_config.py │ ├── dags │ │ ├── dbt_mesh_workflow.py │ │ ├── dbt_tests_workflow.py │ │ └── dbt_workflow.py │ ├── docker-compose.yaml │ └── plugins │ │ └── airflow_dbtdocs_page.py ├── dbtcore │ ├── .gitignore │ ├── .sqlfluff │ ├── dbt_project.yml │ ├── models │ │ ├── my_core_table1.sql │ │ ├── my_executedlt_model.py │ │ ├── my_executepython_dlt_model.py │ │ ├── my_executepython_model.py │ │ ├── my_executesql_dbt_model.sql │ │ ├── my_failing_dbt_model.sql │ │ ├── my_first_dbt_model.sql │ │ ├── my_second_dbt_model.sql │ │ └── schema.yml │ └── profiles.yml └── dbtfinance │ ├── .gitignore │ ├── dbt_project.yml │ ├── dependencies.yml │ ├── macros │ └── generate_schema_name.sql │ ├── models │ ├── my_cross_project_ref_model.sql │ └── sources.yml │ └── profiles.yml ├── test_airflow.py ├── test_catalog.py ├── test_custom_adapter.py ├── test_dbt_docs.py ├── test_dbt_sqlfluff.py ├── test_executedlt_materialization.py ├── test_executepython_materialization.py ├── test_executesql_materialization.py ├── test_main.py ├── test_opendbt_airflow.py ├── test_opendbt_cli.py ├── test_opendbt_mesh.py └── test_opendbt_project.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | - package-ecosystem: "pip" 8 | directory: "/" 9 | schedule: 10 | interval: "weekly" 11 | -------------------------------------------------------------------------------- /.github/workflows/deploy-documentation.yml: -------------------------------------------------------------------------------- 1 | name: deploy-mkdocs-documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | - docs 8 | permissions: 9 | contents: write 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Configure Git Credentials 16 | run: | 17 | git config user.name github-actions[bot] 18 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 19 | - uses: actions/setup-python@v5 20 | with: 21 | python-version: 3.x 22 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 23 | - run: pip install mkdocs-material 24 | - run: mkdocs gh-deploy --force -------------------------------------------------------------------------------- /.github/workflows/release-test.yml: -------------------------------------------------------------------------------- 1 | name: Create Test Pypi Release 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | 7 | jobs: 8 | build: 9 | if: github.repository_owner == 'memiiso' 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: [ 3.8 ] 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Install pypa/build 22 | run: | 23 | python -m pip install build --user 24 | - name: Build a binary wheel and a source tarball 25 | run: | 26 | python -m build --sdist --wheel --outdir dist/ . 27 | 28 | - name: Publish main to Test Pypi 29 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/heads/main') 30 | uses: pypa/gh-action-pypi-publish@release/v1 31 | with: 32 | user: __token__ 33 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 34 | repository_url: https://test.pypi.org/legacy/ 35 | skip_existing: true -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Create Pypi Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*.*.*' 7 | 8 | jobs: 9 | build: 10 | if: github.repository_owner == 'memiiso' 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [ 3.8 ] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install pypa/build 23 | run: | 24 | python -m pip install build --user 25 | - name: Build a binary wheel and a source tarball 26 | run: | 27 | python -m build --sdist --wheel --outdir dist/ . 28 | 29 | - name: Publish to Pypi 30 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 31 | uses: pypa/gh-action-pypi-publish@release/v1 32 | with: 33 | user: __token__ 34 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/tests-dbt-version.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test DBT Version 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | dbt-version: 7 | required: true 8 | type: string 9 | 10 | jobs: 11 | test-dbt-version: 12 | runs-on: macos-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python-version: [ "3.9", "3.10", "3.11", "3.12" ] 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | cache: 'pip' # caching pip dependencies 24 | - name: Build & Install DBT ${{ inputs.dbt-version }} 25 | run: | 26 | pip install -q coverage pylint 27 | pip install -q dbt-core==${{ inputs.dbt-version }}.* dbt-duckdb==${{ inputs.dbt-version }}.* --force-reinstall --upgrade 28 | # FIX for protobuf issue: https://github.com/dbt-labs/dbt-core/issues/9759 29 | pip install -q "apache-airflow<3.0.0" "protobuf>=4.25.3,<5.0.0" "opentelemetry-proto<1.28.0" --prefer-binary 30 | pip install -q .[test] --prefer-binary 31 | pip install -q dbt-core==${{ inputs.dbt-version }}.* dbt-duckdb==${{ inputs.dbt-version }}.* --force-reinstall --upgrade 32 | python --version 33 | python -c "from dbt.version import get_installed_version as get_dbt_version;print(f'dbt version={get_dbt_version()}')" 34 | python -m compileall -f opendbt 35 | python -m pylint opendbt 36 | - name: Run Tests 37 | run: | 38 | python -c "from dbt.version import get_installed_version as get_dbt_version;print(f'dbt version={get_dbt_version()}')" 39 | python -m coverage run --source=./tests/ -m unittest discover -s tests/ 40 | python -m coverage report -m ./opendbt/*.py 41 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ main ] 7 | paths-ignore: 8 | - '.idea/**' 9 | - '.run/**' 10 | pull_request: 11 | branches: [ main ] 12 | paths-ignore: 13 | - '.idea/**' 14 | - '.run/**' 15 | 16 | jobs: 17 | test-dbt-1-7: 18 | uses: ./.github/workflows/tests-dbt-version.yml 19 | with: 20 | dbt-version: "1.7" 21 | needs: test-dbt-1-8 22 | test-dbt-1-8: 23 | uses: ./.github/workflows/tests-dbt-version.yml 24 | with: 25 | dbt-version: "1.8" 26 | needs: test-dbt-1-9 27 | test-dbt-1-9: 28 | uses: ./.github/workflows/tests-dbt-version.yml 29 | with: 30 | dbt-version: "1.9" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | **.duckdb 3 | **.user.yml 4 | reset.sh 5 | 6 | ###### JetBrains ###### 7 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 8 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 9 | 10 | # User-specific stuff 11 | .idea/**/workspace.xml 12 | .idea/**/tasks.xml 13 | .idea/**/usage.statistics.xml 14 | .idea/**/dictionaries 15 | .idea/**/shelf 16 | 17 | # AWS User-specific 18 | .idea/**/aws.xml 19 | 20 | # Generated files 21 | .idea/**/contentModel.xml 22 | 23 | # Sensitive or high-churn files 24 | .idea/**/dataSources/ 25 | .idea/**/dataSources.ids 26 | .idea/**/dataSources.local.xml 27 | .idea/**/sqlDataSources.xml 28 | .idea/**/dynamic.xml 29 | .idea/**/uiDesigner.xml 30 | .idea/**/dbnavigator.xml 31 | 32 | # Gradle 33 | .idea/**/gradle.xml 34 | .idea/**/libraries 35 | 36 | # Gradle and Maven with auto-import 37 | # When using Gradle or Maven with auto-import, you should exclude module files, 38 | # since they will be recreated, and may cause churn. Uncomment if using 39 | # auto-import. 40 | # .idea/artifacts 41 | # .idea/compiler.xml 42 | # .idea/jarRepositories.xml 43 | # .idea/modules.xml 44 | # .idea/*.iml 45 | # .idea/modules 46 | # *.iml 47 | # *.ipr 48 | 49 | # CMake 50 | cmake-build-*/ 51 | 52 | # Mongo Explorer plugin 53 | .idea/**/mongoSettings.xml 54 | 55 | # File-based project format 56 | *.iws 57 | 58 | # IntelliJ 59 | out/ 60 | 61 | # mpeltonen/sbt-idea plugin 62 | .idea_modules/ 63 | 64 | # JIRA plugin 65 | atlassian-ide-plugin.xml 66 | 67 | # Cursive Clojure plugin 68 | .idea/replstate.xml 69 | 70 | # SonarLint plugin 71 | .idea/sonarlint/ 72 | 73 | # Crashlytics plugin (for Android Studio and IntelliJ) 74 | com_crashlytics_export_strings.xml 75 | crashlytics.properties 76 | crashlytics-build.properties 77 | fabric.properties 78 | 79 | # Editor-based Rest Client 80 | .idea/httpRequests 81 | 82 | # Android studio 3.1+ serialized cache file 83 | .idea/caches/build_file_checksums.ser 84 | 85 | 86 | ###### Python ###### 87 | # Byte-compiled / optimized / DLL files 88 | __pycache__/ 89 | *.py[cod] 90 | *$py.class 91 | 92 | # C extensions 93 | *.so 94 | 95 | # Distribution / packaging 96 | .Python 97 | build/ 98 | develop-eggs/ 99 | dist/ 100 | downloads/ 101 | eggs/ 102 | .eggs/ 103 | lib/ 104 | lib64/ 105 | parts/ 106 | sdist/ 107 | var/ 108 | wheels/ 109 | share/python-wheels/ 110 | *.egg-info/ 111 | .installed.cfg 112 | *.egg 113 | MANIFEST 114 | 115 | # PyInstaller 116 | # Usually these files are written by a python script from a template 117 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 118 | *.manifest 119 | *.spec 120 | 121 | # Installer logs 122 | pip-log.txt 123 | pip-delete-this-directory.txt 124 | 125 | # Unit test / coverage reports 126 | htmlcov/ 127 | .tox/ 128 | .nox/ 129 | .coverage 130 | .coverage.* 131 | .cache 132 | nosetests.xml 133 | coverage.xml 134 | *.cover 135 | *.py,cover 136 | .hypothesis/ 137 | .pytest_cache/ 138 | cover/ 139 | 140 | # Translations 141 | *.mo 142 | *.pot 143 | 144 | # Django stuff: 145 | *.log 146 | local_settings.py 147 | db.sqlite3 148 | db.sqlite3-journal 149 | 150 | # Flask stuff: 151 | instance/ 152 | .webassets-cache 153 | 154 | # Scrapy stuff: 155 | .scrapy 156 | 157 | # Sphinx documentation 158 | docs/_build/ 159 | 160 | # PyBuilder 161 | .pybuilder/ 162 | target/ 163 | 164 | # Jupyter Notebook 165 | .ipynb_checkpoints 166 | 167 | # IPython 168 | profile_default/ 169 | ipython_config.py 170 | 171 | # pyenv 172 | # For a library or package, you might want to ignore these files since the code is 173 | # intended to run in multiple environments; otherwise, check them in: 174 | # .python-version 175 | 176 | # pipenv 177 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 178 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 179 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 180 | # install all needed dependencies. 181 | #Pipfile.lock 182 | 183 | # poetry 184 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 185 | # This is especially recommended for binary packages to ensure reproducibility, and is more 186 | # commonly ignored for libraries. 187 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 188 | #poetry.lock 189 | 190 | # pdm 191 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 192 | #pdm.lock 193 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 194 | # in version control. 195 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 196 | .pdm.toml 197 | .pdm-python 198 | .pdm-build/ 199 | 200 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 201 | __pypackages__/ 202 | 203 | # Celery stuff 204 | celerybeat-schedule 205 | celerybeat.pid 206 | 207 | # SageMath parsed files 208 | *.sage.py 209 | 210 | # Environments 211 | .env 212 | .venv 213 | env/ 214 | venv/ 215 | ENV/ 216 | env.bak/ 217 | venv.bak/ 218 | 219 | # Spyder project settings 220 | .spyderproject 221 | .spyproject 222 | 223 | # Rope project settings 224 | .ropeproject 225 | 226 | # mkdocs documentation 227 | /site 228 | 229 | # mypy 230 | .mypy_cache/ 231 | .dmypy.json 232 | dmypy.json 233 | 234 | # Pyre type checker 235 | .pyre/ 236 | 237 | # pytype static type analyzer 238 | .pytype/ 239 | 240 | # Cython debug symbols 241 | cython_debug/ 242 | 243 | # PyCharm 244 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 245 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 246 | # and can be added to the global gitignore or merged into this file. For a more nuclear 247 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 248 | #.idea/ 249 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/git_toolbox_blame.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/git_toolbox_prj.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 14 | 15 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | EmbeddedPerformanceJava 13 | 14 | 15 | Error handlingJava 16 | 17 | 18 | Groovy 19 | 20 | 21 | InitializationJava 22 | 23 | 24 | JVM languages 25 | 26 | 27 | Java 28 | 29 | 30 | Java 21Java language level migration aidsJava 31 | 32 | 33 | Java language level migration aidsJava 34 | 35 | 36 | Kotlin 37 | 38 | 39 | LoggingJVM languages 40 | 41 | 42 | MemoryJava 43 | 44 | 45 | PerformanceJava 46 | 47 | 48 | Probable bugsJava 49 | 50 | 51 | Python 52 | 53 | 54 | Redundant constructsKotlin 55 | 56 | 57 | RegExp 58 | 59 | 60 | Style issuesKotlin 61 | 62 | 63 | Threading issuesGroovy 64 | 65 | 66 | Threading issuesJava 67 | 68 | 69 | Verbose or redundant code constructsJava 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/opendbt.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Python_tests_in_tests.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 17 | -------------------------------------------------------------------------------- /.idea/runConfigurations/pip_install.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | -------------------------------------------------------------------------------- /.idea/runConfigurations/pylint.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 25 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | disable= 3 | C, # convention 4 | W, # warnings 5 | import-error, 6 | no-name-in-module, 7 | too-many-arguments, 8 | too-many-positional-arguments, 9 | too-few-public-methods, 10 | no-member, 11 | unexpected-keyword-arg, 12 | R0801 # Similar lines in 2 files 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![logo-badge](https://github.com/mac-s-g/github-help-wanted/blob/master/src/images/logo-full.png?raw=true) 2 | [![License](http://img.shields.io/:license-apache%202.0-brightgreen.svg)](http://www.apache.org/licenses/LICENSE-2.0.html) 3 | ![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat) 4 | 5 | # opendbt 6 | 7 | This project adds new capabilities to dbt-core by dynamically extending dbt's source code. 8 | 9 | dbt is a popular solution for batch data processing in data analytics. While it operates on 10 | an [open-core model](https://opencoreventures.com/blog/2023-07-open-core-is-misunderstood-business-model/), which can 11 | sometimes limit the inclusion of community features in the open-source version. no worries opendbt is here to solve it. 12 | opendbt offers a fully open-source package to address these concerns. **OpenDBT builds upon dbt-core, adding valuable 13 | features without changing dbt-core code.** 14 | 15 | With `opendbt` you can go beyond the core functionalities of dbt. For example seamlessly integrating your customized 16 | adapter and providing jinja context with further adapter/python methods. 17 | 18 | ## Features 19 | 20 | - :white_check_mark: Includes superior [dbt catalog UI](https://memiiso.github.io/opendbt/opendbtdocs/), user-friendly 21 | data catalog, 22 | including row level 23 | lineage, [see it here](https://memiiso.github.io/opendbt/opendbtdocs/) 24 | - :white_check_mark: Integrates Python and DLT Jobs to dbt. Enables Extract&Load (EL) with dbt. 25 | - :white_check_mark: Supports DBT Mesh setups. Supports running multiple projects which are using cross project ref 26 | models. 27 | - :white_check_mark: And many more features, customization options. 28 | - Customize Existing Adapters: add your custom logic to current adapters 29 | - By extending current adapter provide more functions to jinja 30 | - Execute Local Python 31 | Code: [run local Python code](https://medium.com/@ismail-simsek/make-dbt-great-again-ec34f3b661f5). For example, you 32 | could import data from web APIs directly within your dbt model. 33 | - [Integrate DLT](https://github.com/memiiso/opendbt/issues/40). Run end to end ETL pipeline with dbt and DLT. 34 | - [Use multi project dbt-mesh setup cross-project references](https://docs.getdbt.com/docs/collaborate/govern/project-dependencies#how-to-write-cross-project-ref). 35 | - This feature was only available in "dbt Cloud Enterprise" so far. 36 | - Granular Model-Level Orchestration with Airflow: Integrate Airflow for fine-grained control over model execution. 37 | - Serve dbt Docs in Airflow UI: Create a custom page on the Airflow server that displays dbt documentation as an 38 | Airflow 39 | UI page. 40 | - Register [dbt callbacks](https://docs.getdbt.com/reference/programmatic-invocations#registering-callbacks) within a 41 | dbt project to trigger custom actions or alerting based on selected dbt events. 42 | 43 | [See documentation for further details and detailed examples](https://memiiso.github.io/opendbt/). 44 | 45 | ![opendbt-airflow-ui.png](https://raw.githubusercontent.com/memiiso/opendbt/main/docs/assets/opendbt-airflow-ui.png) 46 | 47 | ## Installation 48 | 49 | install from github or pypi: 50 | 51 | ```shell 52 | pip install opendbt==0.13.0 53 | # Or 54 | pip install https://github.com/memiiso/opendbt/archive/refs/tags/0.4.0.zip --upgrade --user 55 | ``` 56 | 57 | ## **Your Contributions Matter** 58 | 59 | The project completely open-source, using the Apache 2.0 license. 60 | opendbt still is a young project and there are things to improve. 61 | Please feel free to test it, give feedback, open feature requests or send pull requests. 62 | 63 | ### Contributors 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/assets/airflow-dbt-docs-link.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/airflow-dbt-docs-link.png -------------------------------------------------------------------------------- /docs/assets/airflow-dbt-docs-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/airflow-dbt-docs-page.png -------------------------------------------------------------------------------- /docs/assets/airflow-dbt-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/airflow-dbt-flow.png -------------------------------------------------------------------------------- /docs/assets/dbt-custom-adapter-python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/dbt-custom-adapter-python.png -------------------------------------------------------------------------------- /docs/assets/dbt-local-python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/dbt-local-python.png -------------------------------------------------------------------------------- /docs/assets/docs-columns-transformation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-columns-transformation.png -------------------------------------------------------------------------------- /docs/assets/docs-dependencies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-dependencies.png -------------------------------------------------------------------------------- /docs/assets/docs-lineage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-lineage.png -------------------------------------------------------------------------------- /docs/assets/docs-run-info-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-run-info-error.png -------------------------------------------------------------------------------- /docs/assets/docs-run-info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/docs-run-info.png -------------------------------------------------------------------------------- /docs/assets/opendbt-airflow-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/docs/assets/opendbt-airflow-ui.png -------------------------------------------------------------------------------- /docs/catalog.md: -------------------------------------------------------------------------------- 1 | # Opendbt Catalog 2 | 3 | [See it in action](https://memiiso.github.io/opendbt/opendbtdocs/) 4 | 5 | Summary of the catalog files: 6 | 7 | - [catalog.json](catalog.json): Generated by dbt 8 | - [catalogl.json](catalogl.json): Generated by opendbt contains extended catalog information with column level lineage 9 | - [manifest.json](manifest.json): Generated by dbt 10 | - [run_info.json](run_info.json): Generated by opendbt, contains latest run information per object/model 11 | 12 | ## Key Features 13 | 14 | ### Up to date Run information 15 | 16 | ![docs-run-info.png](assets/docs-run-info.png) 17 | 18 | ### Run information with error messages 19 | 20 | ![docs-run-info-error.png](assets/docs-run-info-error.png) 21 | 22 | ### Model dependencies including tests 23 | 24 | ![docs-dependencies.png](assets/docs-dependencies.png) 25 | 26 | ### Column level dependency lineage, transformation 27 | 28 | ![docs-columns-transformation.png](assets/docs-columns-transformation.png) 29 | 30 | ### Dependency lineage 31 | 32 | ![docs-lineage.png](assets/docs-lineage.png) -------------------------------------------------------------------------------- /docs/examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## Using dbt with User-Defined Adapters and Jinja Methods 4 | 5 | To add custom methods to an existing adapter and expose them to Jinja templates, follow these steps: 6 | 7 | **Step-1:** Extend the Adapter 8 | Create a new adapter class that inherits from the desired base adapter. Add the necessary methods to this class. 9 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/opendbt/examples.py#L10-L26 10 | 11 | **Step-2:** In your `dbt_project.yml` file, set the `dbt_custom_adapter` variable to the fully qualified name of your 12 | custom adapter class. This will enable opendbt to recognize activate your adapter. 13 | ```yml 14 | vars: 15 | dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom 16 | ``` 17 | 18 | **Step-3:** Execute dbt commands as usual. dbt will now load and utilize your custom adapter class, allowing you to 19 | access the newly defined methods within your Jinja macros. 20 | ```python 21 | from opendbt import OpenDbtProject 22 | 23 | dp = OpenDbtProject(project_dir="/dbt/project_dir", profiles_dir="/dbt/profiles_dir") 24 | dp.run(command="run") 25 | ``` 26 | 27 | ## Executing Python Models Locally with dbt 28 | 29 | By leveraging a customized adapter and a custom materialization, dbt can be extended to execute Python code locally. 30 | This powerful capability is particularly useful for scenarios involving data ingestion from external APIs, enabling 31 | seamless integration within the dbt framework. 32 | 33 | **Step-1:** We'll extend an existing adapter (like `DuckDBAdapter`) to add a new method, `submit_local_python_job`. This 34 | method will execute the provided Python code as a subprocess. 35 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/opendbt/examples.py#L10-L26 36 | 37 | **Step-2:** Create a new materialization named `executepython`. This materialization will call the newly added 38 | `submit_local_python_job` method from the custom adapter to execute the compiled Python code. 39 | 40 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/opendbt/macros/executepython.sql#L1-L26 41 | 42 | **Step-3:** Let's create a sample Python model that will be executed locally by dbt using the executepython 43 | materialization. 44 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/tests/resources/dbttest/models/my_executepython_dbt_model.py#L1-L22 45 | 46 | ## Orchestrating dbt Models with Airflow 47 | 48 | **Step-1:** Let's create an Airflow DAG to orchestrate the execution of your dbt project. 49 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/tests/resources/airflow/dags/dbt_workflow.py#L17-L32 50 | 51 | ![airflow-dbt-flow.png](assets%2Fairflow-dbt-flow.png) 52 | 53 | #### Creating Airflow DAG that selectively executes a specific subset of models from your dbt project. 54 | 55 | ```python 56 | from opendbt.airflow import OpenDbtAirflowProject 57 | 58 | # create dbt build tasks for models with given tag 59 | p = OpenDbtAirflowProject(resource_type='model', project_dir="/dbt/project_dir", profiles_dir="/dbt/profiles_dir", 60 | target='dev', tag="MY_TAG") 61 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end) 62 | ``` 63 | 64 | #### Creating dag to run dbt tests 65 | 66 | ```python 67 | from opendbt.airflow import OpenDbtAirflowProject 68 | 69 | # create dbt test tasks with given model tag 70 | p = OpenDbtAirflowProject(resource_type='test', project_dir="/dbt/project_dir", profiles_dir="/dbt/profiles_dir", 71 | target='dev', tag="MY_TAG") 72 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end) 73 | ``` 74 | 75 | ## Integrating dbt Documentation into Airflow 76 | 77 | Airflow, a powerful workflow orchestration tool, can be leveraged to streamline not only dbt execution but also dbt 78 | documentation access. By integrating dbt documentation into your Airflow interface, you can centralize your data 79 | engineering resources and improve team collaboration. 80 | 81 | here is how: 82 | **Step-1:** Create python file. Navigate to your Airflow's `{airflow}/plugins` directory. 83 | Create a new Python file and name it appropriately, such as `dbt_docs_plugin.py`. Add following code to 84 | `dbt_docs_plugin.py` file. 85 | Ensure that the specified path accurately points to the folder where your dbt project generates its documentation. 86 | https://github.com/memiiso/opendbt/blob/a5a7a598a3e4f04e184b38257578279473d78cfc/tests/resources/airflow/plugins/airflow_dbtdocs_page.py#L1-L6 87 | 88 | **Step-2:** Restart Airflow to activate the plugin. Once the restart is complete, you should see a new link labeled 89 | `DBT Docs` within your Airflow web interface. This link will provide access to your dbt documentation. 90 | ![airflow-dbt-docs-link.png](assets%2Fairflow-dbt-docs-link.png) 91 | 92 | **Step-3:** Click on the `DBT Docs` link to open your dbt documentation. 93 | ![airflow-dbt-docs-page.png](assets%2Fairflow-dbt-docs-page.png) -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --8<-- "README.md" -------------------------------------------------------------------------------- /docs/opendbtdocs/catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", 4 | "dbt_version": "1.9.4", 5 | "generated_at": "2025-05-15T18:01:15.469569Z", 6 | "invocation_id": "4a81f777-5b15-45b3-8c88-b08d0f7bb9ea", 7 | "env": {} 8 | }, 9 | "nodes": { 10 | "model.dbtcore.my_core_table1": { 11 | "metadata": { 12 | "type": "BASE TABLE", 13 | "schema": "core", 14 | "name": "my_core_table1", 15 | "database": "dev", 16 | "comment": null, 17 | "owner": null 18 | }, 19 | "columns": { 20 | "id": { 21 | "type": "INTEGER", 22 | "index": 1, 23 | "name": "id", 24 | "comment": null 25 | }, 26 | "row_data": { 27 | "type": "VARCHAR", 28 | "index": 2, 29 | "name": "row_data", 30 | "comment": null 31 | } 32 | }, 33 | "stats": { 34 | "has_stats": { 35 | "id": "has_stats", 36 | "label": "Has Stats?", 37 | "value": false, 38 | "include": false, 39 | "description": "Indicates whether there are statistics for this table" 40 | } 41 | }, 42 | "unique_id": "model.dbtcore.my_core_table1" 43 | }, 44 | "model.dbtcore.my_executedlt_model": { 45 | "metadata": { 46 | "type": "BASE TABLE", 47 | "schema": "core", 48 | "name": "my_executedlt_model", 49 | "database": "dev", 50 | "comment": null, 51 | "owner": null 52 | }, 53 | "columns": { 54 | "event_tstamp": { 55 | "type": "TIMESTAMP WITH TIME ZONE", 56 | "index": 1, 57 | "name": "event_tstamp", 58 | "comment": null 59 | }, 60 | "event_id": { 61 | "type": "BIGINT", 62 | "index": 2, 63 | "name": "event_id", 64 | "comment": null 65 | }, 66 | "_dlt_load_id": { 67 | "type": "VARCHAR", 68 | "index": 3, 69 | "name": "_dlt_load_id", 70 | "comment": null 71 | }, 72 | "_dlt_id": { 73 | "type": "VARCHAR", 74 | "index": 4, 75 | "name": "_dlt_id", 76 | "comment": null 77 | }, 78 | "event_tstamp__v_text": { 79 | "type": "VARCHAR", 80 | "index": 5, 81 | "name": "event_tstamp__v_text", 82 | "comment": null 83 | } 84 | }, 85 | "stats": { 86 | "has_stats": { 87 | "id": "has_stats", 88 | "label": "Has Stats?", 89 | "value": false, 90 | "include": false, 91 | "description": "Indicates whether there are statistics for this table" 92 | } 93 | }, 94 | "unique_id": "model.dbtcore.my_executedlt_model" 95 | }, 96 | "model.dbtcore.my_first_dbt_model": { 97 | "metadata": { 98 | "type": "BASE TABLE", 99 | "schema": "core", 100 | "name": "my_first_dbt_model", 101 | "database": "dev", 102 | "comment": null, 103 | "owner": null 104 | }, 105 | "columns": { 106 | "id": { 107 | "type": "INTEGER", 108 | "index": 1, 109 | "name": "id", 110 | "comment": null 111 | }, 112 | "data_value": { 113 | "type": "VARCHAR", 114 | "index": 2, 115 | "name": "data_value", 116 | "comment": null 117 | }, 118 | "column_3": { 119 | "type": "VARCHAR", 120 | "index": 3, 121 | "name": "column_3", 122 | "comment": null 123 | } 124 | }, 125 | "stats": { 126 | "has_stats": { 127 | "id": "has_stats", 128 | "label": "Has Stats?", 129 | "value": false, 130 | "include": false, 131 | "description": "Indicates whether there are statistics for this table" 132 | } 133 | }, 134 | "unique_id": "model.dbtcore.my_first_dbt_model" 135 | }, 136 | "model.dbtcore.my_second_dbt_model": { 137 | "metadata": { 138 | "type": "BASE TABLE", 139 | "schema": "core", 140 | "name": "my_second_dbt_model", 141 | "database": "dev", 142 | "comment": null, 143 | "owner": null 144 | }, 145 | "columns": { 146 | "pk_id": { 147 | "type": "INTEGER", 148 | "index": 1, 149 | "name": "pk_id", 150 | "comment": null 151 | }, 152 | "data_value1": { 153 | "type": "VARCHAR", 154 | "index": 2, 155 | "name": "data_value1", 156 | "comment": null 157 | }, 158 | "data_value2": { 159 | "type": "VARCHAR", 160 | "index": 3, 161 | "name": "data_value2", 162 | "comment": null 163 | }, 164 | "event_tstamp": { 165 | "type": "TIMESTAMP WITH TIME ZONE", 166 | "index": 4, 167 | "name": "event_tstamp", 168 | "comment": null 169 | } 170 | }, 171 | "stats": { 172 | "has_stats": { 173 | "id": "has_stats", 174 | "label": "Has Stats?", 175 | "value": false, 176 | "include": false, 177 | "description": "Indicates whether there are statistics for this table" 178 | } 179 | }, 180 | "unique_id": "model.dbtcore.my_second_dbt_model" 181 | } 182 | }, 183 | "sources": {}, 184 | "errors": null 185 | } -------------------------------------------------------------------------------- /docs/opendbtdocs/catalogl.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", 4 | "dbt_version": "1.9.4", 5 | "generated_at": "2025-05-15T18:01:15.469569Z", 6 | "invocation_id": "4a81f777-5b15-45b3-8c88-b08d0f7bb9ea", 7 | "env": {} 8 | }, 9 | "nodes": { 10 | "model.dbtfinance.my_cross_project_ref_model": { 11 | "stats": {}, 12 | "columns": { 13 | "id": { 14 | "name": "id", 15 | "type": "unknown", 16 | "column_fqn": "dev.finance.my_cross_project_ref_model.id", 17 | "table_fqn": "dev.finance.my_cross_project_ref_model", 18 | "table_relative_fqn": "finance.my_cross_project_ref_model", 19 | "transformations": [], 20 | "depends_on": [] 21 | }, 22 | "row_data": { 23 | "name": "row_data", 24 | "type": "unknown", 25 | "column_fqn": "dev.finance.my_cross_project_ref_model.row_data", 26 | "table_fqn": "dev.finance.my_cross_project_ref_model", 27 | "table_relative_fqn": "finance.my_cross_project_ref_model", 28 | "transformations": [], 29 | "depends_on": [] 30 | }, 31 | "num_rows": { 32 | "name": "num_rows", 33 | "type": "unknown", 34 | "column_fqn": "dev.finance.my_cross_project_ref_model.num_rows", 35 | "table_fqn": "dev.finance.my_cross_project_ref_model", 36 | "table_relative_fqn": "finance.my_cross_project_ref_model", 37 | "transformations": [], 38 | "depends_on": [] 39 | } 40 | }, 41 | "metadata": {} 42 | }, 43 | "model.dbtcore.my_first_dbt_model": { 44 | "stats": {}, 45 | "columns": { 46 | "data_value": { 47 | "name": "data_value", 48 | "description": "", 49 | "meta": {}, 50 | "data_type": null, 51 | "constraints": [], 52 | "quote": null, 53 | "tags": [], 54 | "granularity": null, 55 | "type": "unknown", 56 | "column_fqn": "dev.core.my_first_dbt_model.data_value", 57 | "table_fqn": "dev.core.my_first_dbt_model", 58 | "table_relative_fqn": "core.my_first_dbt_model", 59 | "transformations": [ 60 | "'test-value' AS data_value", 61 | "'test-value' AS data_value", 62 | "'test-value' AS data_value", 63 | "'test-value' AS data_value", 64 | "source_data.data_value AS data_value" 65 | ], 66 | "depends_on": [] 67 | }, 68 | "column_3": { 69 | "name": "column_3", 70 | "description": "", 71 | "meta": {}, 72 | "data_type": null, 73 | "constraints": [], 74 | "quote": null, 75 | "tags": [], 76 | "granularity": null, 77 | "type": "unknown", 78 | "column_fqn": "dev.core.my_first_dbt_model.column_3", 79 | "table_fqn": "dev.core.my_first_dbt_model", 80 | "table_relative_fqn": "core.my_first_dbt_model", 81 | "transformations": [ 82 | "'test-value' AS column_3", 83 | "'test-value' AS column_3", 84 | "'test-value' AS column_3", 85 | "'test-value' AS column_3", 86 | "source_data.column_3 AS column_3" 87 | ], 88 | "depends_on": [] 89 | }, 90 | "id": { 91 | "name": "id", 92 | "description": "The **primary key** for this table", 93 | "meta": {}, 94 | "data_type": null, 95 | "constraints": [], 96 | "quote": null, 97 | "tags": [], 98 | "granularity": null, 99 | "type": "unknown", 100 | "column_fqn": "dev.core.my_first_dbt_model.id", 101 | "table_fqn": "dev.core.my_first_dbt_model", 102 | "table_relative_fqn": "core.my_first_dbt_model", 103 | "transformations": [ 104 | "NULL AS id", 105 | "2 AS id", 106 | "1 AS id", 107 | "1 AS id", 108 | "source_data.id AS id" 109 | ], 110 | "depends_on": [] 111 | } 112 | }, 113 | "metadata": {} 114 | }, 115 | "model.dbtcore.my_executesql_dbt_model": { 116 | "stats": {}, 117 | "columns": {}, 118 | "metadata": {} 119 | }, 120 | "model.dbtcore.my_failing_dbt_model": { 121 | "stats": {}, 122 | "columns": { 123 | "my_failing_column": { 124 | "name": "my_failing_column", 125 | "type": "unknown", 126 | "column_fqn": "dev.core.my_failing_dbt_model.my_failing_column", 127 | "table_fqn": "dev.core.my_failing_dbt_model", 128 | "table_relative_fqn": "core.my_failing_dbt_model", 129 | "transformations": [], 130 | "depends_on": [] 131 | } 132 | }, 133 | "metadata": {} 134 | }, 135 | "model.dbtcore.my_core_table1": { 136 | "stats": {}, 137 | "columns": { 138 | "id": { 139 | "name": "id", 140 | "description": "", 141 | "meta": {}, 142 | "data_type": null, 143 | "constraints": [], 144 | "quote": null, 145 | "tags": [], 146 | "granularity": null, 147 | "type": "unknown", 148 | "column_fqn": "dev.core.my_core_table1.id", 149 | "table_fqn": "dev.core.my_core_table1", 150 | "table_relative_fqn": "core.my_core_table1", 151 | "transformations": [ 152 | "2 AS id", 153 | "1 AS id", 154 | "source_data.id AS id" 155 | ], 156 | "depends_on": [] 157 | }, 158 | "row_data": { 159 | "name": "row_data", 160 | "description": "", 161 | "meta": {}, 162 | "data_type": null, 163 | "constraints": [], 164 | "quote": null, 165 | "tags": [], 166 | "granularity": null, 167 | "type": "unknown", 168 | "column_fqn": "dev.core.my_core_table1.row_data", 169 | "table_fqn": "dev.core.my_core_table1", 170 | "table_relative_fqn": "core.my_core_table1", 171 | "transformations": [ 172 | "'row1' AS row_data", 173 | "'row1' AS row_data", 174 | "source_data.row_data AS row_data" 175 | ], 176 | "depends_on": [] 177 | } 178 | }, 179 | "metadata": {} 180 | }, 181 | "model.dbtcore.my_second_dbt_model": { 182 | "stats": {}, 183 | "columns": { 184 | "pk_id": { 185 | "name": "pk_id", 186 | "description": "The primary key for this table", 187 | "meta": {}, 188 | "data_type": null, 189 | "constraints": [], 190 | "quote": null, 191 | "tags": [], 192 | "granularity": null, 193 | "type": "unknown", 194 | "column_fqn": "dev.core.my_second_dbt_model.pk_id", 195 | "table_fqn": "dev.core.my_second_dbt_model", 196 | "table_relative_fqn": "core.my_second_dbt_model", 197 | "transformations": [ 198 | "dev.core.my_first_dbt_model AS t1", 199 | "t1.id AS pk_id" 200 | ], 201 | "depends_on": [ 202 | { 203 | "name": "id", 204 | "type": "unknown", 205 | "column_fqn": "dev.core.my_first_dbt_model.id", 206 | "table_fqn": "dev.core.my_first_dbt_model", 207 | "table_relative_fqn": "core.my_first_dbt_model", 208 | "transformations": [], 209 | "depends_on": [], 210 | "model_id": "model.dbtcore.my_first_dbt_model" 211 | } 212 | ] 213 | }, 214 | "data_value1": { 215 | "name": "data_value1", 216 | "description": "", 217 | "meta": {}, 218 | "data_type": null, 219 | "constraints": [], 220 | "quote": null, 221 | "tags": [], 222 | "granularity": null, 223 | "type": "unknown", 224 | "column_fqn": "dev.core.my_second_dbt_model.data_value1", 225 | "table_fqn": "dev.core.my_second_dbt_model", 226 | "table_relative_fqn": "core.my_second_dbt_model", 227 | "transformations": [ 228 | "dev.core.my_first_dbt_model AS t1", 229 | "t1.data_value AS data_value1" 230 | ], 231 | "depends_on": [ 232 | { 233 | "name": "data_value", 234 | "type": "unknown", 235 | "column_fqn": "dev.core.my_first_dbt_model.data_value", 236 | "table_fqn": "dev.core.my_first_dbt_model", 237 | "table_relative_fqn": "core.my_first_dbt_model", 238 | "transformations": [], 239 | "depends_on": [], 240 | "model_id": "model.dbtcore.my_first_dbt_model" 241 | } 242 | ] 243 | }, 244 | "data_value2": { 245 | "name": "data_value2", 246 | "description": "", 247 | "meta": {}, 248 | "data_type": null, 249 | "constraints": [], 250 | "quote": null, 251 | "tags": [], 252 | "granularity": null, 253 | "type": "unknown", 254 | "column_fqn": "dev.core.my_second_dbt_model.data_value2", 255 | "table_fqn": "dev.core.my_second_dbt_model", 256 | "table_relative_fqn": "core.my_second_dbt_model", 257 | "transformations": [ 258 | "dev.core.my_core_table1 AS t2", 259 | "dev.core.my_first_dbt_model AS t1", 260 | "dev.core.my_first_dbt_model AS t1", 261 | "CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2" 262 | ], 263 | "depends_on": [ 264 | { 265 | "name": "data_value", 266 | "type": "unknown", 267 | "column_fqn": "dev.core.my_first_dbt_model.data_value", 268 | "table_fqn": "dev.core.my_first_dbt_model", 269 | "table_relative_fqn": "core.my_first_dbt_model", 270 | "transformations": [], 271 | "depends_on": [], 272 | "model_id": "model.dbtcore.my_first_dbt_model" 273 | }, 274 | { 275 | "name": "column_3", 276 | "type": "unknown", 277 | "column_fqn": "dev.core.my_first_dbt_model.column_3", 278 | "table_fqn": "dev.core.my_first_dbt_model", 279 | "table_relative_fqn": "core.my_first_dbt_model", 280 | "transformations": [], 281 | "depends_on": [], 282 | "model_id": "model.dbtcore.my_first_dbt_model" 283 | }, 284 | { 285 | "name": "row_data", 286 | "type": "unknown", 287 | "column_fqn": "dev.core.my_core_table1.row_data", 288 | "table_fqn": "dev.core.my_core_table1", 289 | "table_relative_fqn": "core.my_core_table1", 290 | "transformations": [], 291 | "depends_on": [], 292 | "model_id": "model.dbtcore.my_core_table1" 293 | } 294 | ] 295 | }, 296 | "event_tstamp": { 297 | "name": "event_tstamp", 298 | "description": "", 299 | "meta": {}, 300 | "data_type": null, 301 | "constraints": [], 302 | "quote": null, 303 | "tags": [], 304 | "granularity": null, 305 | "type": "unknown", 306 | "column_fqn": "dev.core.my_second_dbt_model.event_tstamp", 307 | "table_fqn": "dev.core.my_second_dbt_model", 308 | "table_relative_fqn": "core.my_second_dbt_model", 309 | "transformations": [ 310 | "dev.core.my_executedlt_model AS t3", 311 | "t3.event_tstamp AS event_tstamp" 312 | ], 313 | "depends_on": [ 314 | { 315 | "name": "event_tstamp", 316 | "type": "unknown", 317 | "column_fqn": "dev.core.my_executedlt_model.event_tstamp", 318 | "table_fqn": "dev.core.my_executedlt_model", 319 | "table_relative_fqn": "core.my_executedlt_model", 320 | "transformations": [], 321 | "depends_on": [], 322 | "model_id": "model.dbtcore.my_executedlt_model" 323 | } 324 | ] 325 | } 326 | }, 327 | "metadata": {} 328 | }, 329 | "model.dbtcore.my_executepython_dlt_model": { 330 | "stats": {}, 331 | "columns": {}, 332 | "metadata": {} 333 | }, 334 | "model.dbtcore.my_executedlt_model": { 335 | "stats": {}, 336 | "columns": { 337 | "event_id": { 338 | "name": "event_id", 339 | "description": "", 340 | "meta": {}, 341 | "data_type": null, 342 | "constraints": [], 343 | "quote": null, 344 | "tags": [], 345 | "granularity": null, 346 | "type": "unknown", 347 | "column_fqn": "dev.core.my_executedlt_model.event_id", 348 | "table_fqn": "dev.core.my_executedlt_model", 349 | "table_relative_fqn": "core.my_executedlt_model", 350 | "transformations": [], 351 | "depends_on": [] 352 | }, 353 | "event_tstamp": { 354 | "name": "event_tstamp", 355 | "description": "", 356 | "meta": {}, 357 | "data_type": null, 358 | "constraints": [], 359 | "quote": null, 360 | "tags": [], 361 | "granularity": null, 362 | "type": "unknown", 363 | "column_fqn": "dev.core.my_executedlt_model.event_tstamp", 364 | "table_fqn": "dev.core.my_executedlt_model", 365 | "table_relative_fqn": "core.my_executedlt_model", 366 | "transformations": [], 367 | "depends_on": [] 368 | } 369 | }, 370 | "metadata": {} 371 | }, 372 | "model.dbtcore.my_executepython_model": { 373 | "stats": {}, 374 | "columns": { 375 | "event_id": { 376 | "name": "event_id", 377 | "description": "", 378 | "meta": {}, 379 | "data_type": null, 380 | "constraints": [], 381 | "quote": null, 382 | "tags": [], 383 | "granularity": null, 384 | "type": "unknown", 385 | "column_fqn": "dev.core.my_executepython_model.event_id", 386 | "table_fqn": "dev.core.my_executepython_model", 387 | "table_relative_fqn": "core.my_executepython_model", 388 | "transformations": [], 389 | "depends_on": [] 390 | }, 391 | "event_tstamp": { 392 | "name": "event_tstamp", 393 | "description": "", 394 | "meta": {}, 395 | "data_type": null, 396 | "constraints": [], 397 | "quote": null, 398 | "tags": [], 399 | "granularity": null, 400 | "type": "unknown", 401 | "column_fqn": "dev.core.my_executepython_model.event_tstamp", 402 | "table_fqn": "dev.core.my_executepython_model", 403 | "table_relative_fqn": "core.my_executepython_model", 404 | "transformations": [], 405 | "depends_on": [] 406 | } 407 | }, 408 | "metadata": {} 409 | }, 410 | "source.dbtfinance.core.my_executepython_model": { 411 | "stats": {}, 412 | "columns": {}, 413 | "metadata": {} 414 | }, 415 | "source.dbtfinance.core.my_executepython_dlt_model": { 416 | "stats": {}, 417 | "columns": {}, 418 | "metadata": {} 419 | } 420 | }, 421 | "sources": {}, 422 | "errors": null 423 | } -------------------------------------------------------------------------------- /docs/opendbtdocs/run_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/run-results/v6.json", 4 | "dbt_version": "1.9.4", 5 | "generated_at": "2025-05-15T18:01:12.604966Z", 6 | "invocation_id": "03a957b9-3b54-4612-b157-2c69961dbcf9", 7 | "env": {} 8 | }, 9 | "elapsed_time": 11.362421989440918, 10 | "args": { 11 | "warn_error_options": { 12 | "include": [], 13 | "exclude": [] 14 | }, 15 | "show": false, 16 | "which": "build", 17 | "state_modified_compare_vars": false, 18 | "export_saved_queries": false, 19 | "include_saved_query": false, 20 | "use_colors": true, 21 | "version_check": true, 22 | "log_format_file": "debug", 23 | "require_explicit_package_overrides_for_builtin_materializations": true, 24 | "log_level": "info", 25 | "source_freshness_run_project_hooks": false, 26 | "populate_cache": true, 27 | "defer": false, 28 | "select": [], 29 | "require_batched_execution_for_custom_microbatch_strategy": false, 30 | "print": true, 31 | "state_modified_compare_more_unrendered_values": false, 32 | "strict_mode": false, 33 | "require_yaml_configuration_for_mf_time_spines": false, 34 | "send_anonymous_usage_stats": true, 35 | "log_file_max_bytes": 10485760, 36 | "exclude": [], 37 | "resource_types": [], 38 | "log_format": "default", 39 | "partial_parse_file_diff": true, 40 | "write_json": true, 41 | "invocation_command": "dbt test_dbt_docs.py::TestDbtDocs::test_run_docs_generate", 42 | "quiet": false, 43 | "target": "dev", 44 | "vars": {}, 45 | "favor_state": false, 46 | "log_path": "/Users/simseki/IdeaProjects/opendbt/tests/resources/dbtcore/logs", 47 | "macro_debugging": false, 48 | "exclude_resource_types": [], 49 | "require_nested_cumulative_type_params": false, 50 | "require_resource_names_without_spaces": false, 51 | "static_parser": true, 52 | "show_resource_report": false, 53 | "printer_width": 80, 54 | "introspect": true, 55 | "cache_selected_only": false, 56 | "log_level_file": "debug", 57 | "skip_nodes_if_on_run_start_fails": false, 58 | "profiles_dir": "/Users/simseki/IdeaProjects/opendbt/tests/resources/dbtcore", 59 | "project_dir": "/Users/simseki/IdeaProjects/opendbt/tests/resources/dbtcore", 60 | "use_colors_file": true, 61 | "empty": false, 62 | "partial_parse": true, 63 | "indirect_selection": "eager" 64 | }, 65 | "nodes": { 66 | "model.dbtcore.my_core_table1": { 67 | "run_status": "success", 68 | "run_completed_at": "2025-05-15 18:01:01", 69 | "run_message": "OK", 70 | "run_failures": null, 71 | "run_adapter_response": { 72 | "_message": "OK" 73 | } 74 | }, 75 | "model.dbtcore.my_executedlt_model": { 76 | "run_status": "success", 77 | "run_completed_at": "2025-05-15 18:01:09", 78 | "run_message": "Executed DLT pipeline", 79 | "run_failures": null, 80 | "run_adapter_response": { 81 | "_message": "Executed DLT pipeline", 82 | "code": "import dlt\nfrom dlt.pipeline import TPipeline\n\n\n@dlt.resource(\n columns={\"event_tstamp\": {\"data_type\": \"timestamp\", \"precision\": 3}},\n primary_key=\"event_id\",\n)\ndef events():\n yield [{\"event_id\": 1, \"event_tstamp\": \"2024-07-30T10:00:00.123\"},\n {\"event_id\": 2, \"event_tstamp\": \"2025-02-30T10:00:00.321\"}]\n\n\ndef model(dbt, pipeline: TPipeline):\n \"\"\"\n\n :param dbt:\n :param pipeline: Pre-configured dlt pipeline. dlt target connection and dataset is pre-set using the model config!\n :return:\n \"\"\"\n dbt.config(materialized=\"executedlt\")\n print(\"========================================================\")\n print(f\"INFO: DLT Pipeline pipeline_name:{pipeline.pipeline_name}\")\n print(f\"INFO: DLT Pipeline dataset_name:{pipeline.dataset_name}\")\n print(f\"INFO: DLT Pipeline dataset_name:{pipeline}\")\n print(f\"INFO: DLT Pipeline staging:{pipeline.staging}\")\n print(f\"INFO: DLT Pipeline destination:{pipeline.destination}\")\n print(f\"INFO: DLT Pipeline _pipeline_storage:{pipeline._pipeline_storage}\")\n print(f\"INFO: DLT Pipeline _schema_storage:{pipeline._schema_storage}\")\n print(f\"INFO: DLT Pipeline state:{pipeline.state}\")\n print(f\"INFO: DBT this:{dbt.this}\")\n print(\"========================================================\")\n load_info = pipeline.run(events(), table_name=str(str(dbt.this).split('.')[-1]).strip('\"'))\n print(load_info)\n row_counts = pipeline.last_trace.last_normalize_info\n print(row_counts)\n print(\"========================================================\")\n return None\n\n\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\ndef ref(*args, **kwargs):\n refs = {}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n\ndef source(*args, dbt_load_df_function):\n sources = {}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n\nconfig_dict = {}\n\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"dev\"\n schema = \"core\"\n identifier = \"my_executedlt_model\"\n \n def __repr__(self):\n return '\"dev\".\"core\".\"my_executedlt_model\"'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = False\n\n# COMMAND ----------\n\n\n", 83 | "rows_affected": -1 84 | } 85 | }, 86 | "model.dbtcore.my_executepython_dlt_model": { 87 | "run_status": "success", 88 | "run_completed_at": "2025-05-15 18:01:12", 89 | "run_message": "Executed Python", 90 | "run_failures": null, 91 | "run_adapter_response": { 92 | "_message": "Executed Python", 93 | "code": "import dlt\n\n\n@dlt.resource(\n columns={\"event_tstamp\": {\"data_type\": \"timestamp\", \"precision\": 3}},\n primary_key=\"event_id\",\n)\ndef events():\n yield [{\"event_id\": 1, \"event_tstamp\": \"2024-07-30T10:00:00.123\"},\n {\"event_id\": 2, \"event_tstamp\": \"2025-02-30T10:00:00.321\"}]\n\n\ndef model(dbt, session):\n dbt.config(materialized=\"executepython\")\n print(\"========================================================\")\n print(f\"INFO: DLT Version:{dlt.version.__version__}\")\n print(f\"INFO: DBT Duckdb Session:{type(session)}\")\n print(f\"INFO: DBT Duckdb Connection:{type(session._env.conn)}\")\n print(\"========================================================\")\n p = dlt.pipeline(\n pipeline_name=\"dbt_dlt\",\n destination=dlt.destinations.duckdb(session._env.conn),\n dataset_name=dbt.this.schema,\n dev_mode=False,\n )\n load_info = p.run(events())\n print(load_info)\n row_counts = p.last_trace.last_normalize_info\n print(row_counts)\n print(\"========================================================\")\n return None\n\n\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\ndef ref(*args, **kwargs):\n refs = {}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n\ndef source(*args, dbt_load_df_function):\n sources = {}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n\nconfig_dict = {}\n\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"dev\"\n schema = \"core\"\n identifier = \"my_executepython_dlt_model\"\n \n def __repr__(self):\n return '\"dev\".\"core\".\"my_executepython_dlt_model\"'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = False\n\n# COMMAND ----------\n\n\n", 94 | "rows_affected": -1 95 | } 96 | }, 97 | "model.dbtcore.my_executepython_model": { 98 | "run_status": "success", 99 | "run_completed_at": "2025-05-15 18:01:12", 100 | "run_message": "Executed Python", 101 | "run_failures": null, 102 | "run_adapter_response": { 103 | "_message": "Executed Python", 104 | "code": "import os\nimport platform\n\nfrom dbt import version\n\n\ndef print_info():\n _str = f\"name:{os.name}, system:{platform.system()} release:{platform.release()}\"\n _str += f\"\\npython version:{platform.python_version()}, dbt:{version.__version__}\"\n print(_str)\n\n\ndef model(dbt, session):\n dbt.config(materialized=\"executepython\")\n print(\"==================================================\")\n print(\"========IM LOCALLY EXECUTED PYTHON MODEL==========\")\n print(\"==================================================\")\n print_info()\n print(\"==================================================\")\n print(\"===============MAKE DBT GREAT AGAIN===============\")\n print(\"==================================================\")\n return None\n\n\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\ndef ref(*args, **kwargs):\n refs = {}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n\ndef source(*args, dbt_load_df_function):\n sources = {}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n\nconfig_dict = {}\n\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"dev\"\n schema = \"core\"\n identifier = \"my_executepython_model\"\n \n def __repr__(self):\n return '\"dev\".\"core\".\"my_executepython_model\"'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = False\n\n# COMMAND ----------\n\n\n", 105 | "rows_affected": -1 106 | } 107 | }, 108 | "model.dbtcore.my_executesql_dbt_model": { 109 | "run_status": "success", 110 | "run_completed_at": "2025-05-15 18:01:12", 111 | "run_message": "OK", 112 | "run_failures": null, 113 | "run_adapter_response": { 114 | "_message": "OK" 115 | } 116 | }, 117 | "model.dbtcore.my_first_dbt_model": { 118 | "run_status": "success", 119 | "run_completed_at": "2025-05-15 18:01:12", 120 | "run_message": "OK", 121 | "run_failures": null, 122 | "run_adapter_response": { 123 | "_message": "OK" 124 | } 125 | }, 126 | "test.dbtcore.not_null_my_first_dbt_model_id.5fb22c2710": { 127 | "run_status": "warn", 128 | "run_completed_at": "2025-05-15 18:01:12", 129 | "run_message": "Got 1 result, configured to warn if >0", 130 | "run_failures": 1, 131 | "run_adapter_response": { 132 | "_message": "OK" 133 | } 134 | }, 135 | "test.dbtcore.unique_my_first_dbt_model_id.16e066b321": { 136 | "run_status": "warn", 137 | "run_completed_at": "2025-05-15 18:01:12", 138 | "run_message": "Got 1 result, configured to warn if >0", 139 | "run_failures": 1, 140 | "run_adapter_response": { 141 | "_message": "OK" 142 | } 143 | }, 144 | "model.dbtcore.my_failing_dbt_model": { 145 | "run_status": "error", 146 | "run_completed_at": "2025-05-15 18:01:12", 147 | "run_message": "Runtime Error in model my_failing_dbt_model (models/my_failing_dbt_model.sql)\n Binder Error: Referenced column \"non_exists_column\" not found in FROM clause!\n Candidate bindings: \"id\"\n \n LINE 11: select non_exists_column as my_failing_column\n ^", 148 | "run_failures": null, 149 | "run_adapter_response": {} 150 | }, 151 | "model.dbtcore.my_second_dbt_model": { 152 | "run_status": "success", 153 | "run_completed_at": "2025-05-15 18:01:12", 154 | "run_message": "OK", 155 | "run_failures": null, 156 | "run_adapter_response": { 157 | "_message": "OK" 158 | } 159 | }, 160 | "test.dbtcore.not_null_my_second_dbt_model_pk_id.b08c51696a": { 161 | "run_status": "pass", 162 | "run_completed_at": "2025-05-15 18:01:12", 163 | "run_message": null, 164 | "run_failures": 0, 165 | "run_adapter_response": { 166 | "_message": "OK" 167 | } 168 | }, 169 | "test.dbtcore.unique_my_second_dbt_model_pk_id.b8b65b2a4f": { 170 | "run_status": "fail", 171 | "run_completed_at": "2025-05-15 18:01:12", 172 | "run_message": "Got 1 result, configured to fail if != 0", 173 | "run_failures": 1, 174 | "run_adapter_response": { 175 | "_message": "OK" 176 | } 177 | } 178 | } 179 | } -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: OpenDbt 2 | site_url: http://memiiso.github.io/opendbt 3 | repo_url: https://github.com/memiiso/opendbt 4 | theme: 5 | name: material 6 | features: 7 | # - navigation.instant 8 | - navigation.indexes 9 | - navigation.tabs 10 | # - navigation.expand 11 | - toc.integrate 12 | - content.code.copy 13 | - content.tabs.link 14 | nav: 15 | - Home: index.md 16 | - Examples: examples.md 17 | - Catalog: catalog.md 18 | - Catalog (Demo): opendbtdocs/index.html 19 | 20 | markdown_extensions: 21 | - pymdownx.highlight: 22 | anchor_linenums: true 23 | line_spans: __span 24 | pygments_lang_class: true 25 | - pymdownx.inlinehilite 26 | - pymdownx.snippets 27 | - pymdownx.superfences 28 | - abbr 29 | - pymdownx.snippets: 30 | base_path: [ !relative $config_dir ] 31 | check_paths: true 32 | - attr_list 33 | - pymdownx.emoji: 34 | emoji_index: !!python/name:material.extensions.emoji.twemoji 35 | emoji_generator: !!python/name:material.extensions.emoji.to_svg -------------------------------------------------------------------------------- /opendbt/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from typing import List, Callable, Optional 4 | 5 | # IMPORTANT! this will import the overrides, and activates the patches 6 | from opendbt.dbt import * 7 | from opendbt.logger import OpenDbtLogger 8 | from opendbt.utils import Utils 9 | 10 | 11 | class OpenDbtCli: 12 | def __init__(self, project_dir: Path, profiles_dir: Optional[Path] = None, callbacks: Optional[List[Callable]] = None): 13 | self.project_dir: Path = Path(get_nearest_project_dir(project_dir.as_posix())) 14 | self.profiles_dir: Optional[Path] = profiles_dir if profiles_dir else default_profiles_dir() 15 | self._project: Optional[PartialProject] = None 16 | self._user_callbacks: List[Callable] = callbacks if callbacks else [] 17 | self._project_callbacks: List[Callable] = [] 18 | 19 | @property 20 | def project(self) -> PartialProject: 21 | if not self._project: 22 | self._project = PartialProject.from_project_root(project_root=self.project_dir.as_posix(), 23 | verify_version=True) 24 | 25 | return self._project 26 | 27 | @property 28 | def project_dict(self) -> dict: 29 | return self.project.project_dict 30 | 31 | @property 32 | def project_vars(self) -> dict: 33 | """ 34 | :return: dict: Variables defined in the `dbt_project.yml` file, `vars`. 35 | Note: 36 | This method only retrieves global project variables specified within the `dbt_project.yml` file. 37 | Variables passed via command-line arguments are not included in the returned dictionary. 38 | """ 39 | return self.project_dict.get('vars', {}) 40 | 41 | 42 | @property 43 | def project_callbacks(self) -> List[Callable]: 44 | if not self._project_callbacks: 45 | self._project_callbacks = list(self._user_callbacks) 46 | dbt_callbacks_str = self.project_vars.get('dbt_callbacks', "") 47 | dbt_callbacks_list = [c for c in dbt_callbacks_str.split(',') if c.strip()] 48 | for callback_module_name in dbt_callbacks_list: 49 | callback_func = Utils.import_module_attribute_by_name(callback_module_name.strip()) 50 | self._project_callbacks.append(callback_func) 51 | 52 | return self._project_callbacks 53 | 54 | def invoke(self, args: List[str], callbacks: Optional[List[Callable]] = None) -> dbtRunnerResult: 55 | """ 56 | Run dbt with the given arguments. 57 | 58 | :param args: The arguments to pass to dbt. 59 | :param callbacks: 60 | :return: The result of the dbt run. 61 | """ 62 | run_callbacks = self.project_callbacks + (callbacks if callbacks else self.project_callbacks) 63 | run_args = args or [] 64 | if "--project-dir" not in run_args: 65 | run_args += ["--project-dir", self.project_dir.as_posix()] 66 | if "--profiles-dir" not in run_args and self.profiles_dir: 67 | run_args += ["--profiles-dir", self.profiles_dir.as_posix()] 68 | return self.run(args=run_args, callbacks=run_callbacks) 69 | 70 | @staticmethod 71 | def run(args: List[str], callbacks: Optional[List[Callable]] = None) -> dbtRunnerResult: 72 | """ 73 | Run dbt with the given arguments. 74 | 75 | :param callbacks: 76 | :param args: The arguments to pass to dbt. 77 | :return: The result of the dbt run. 78 | """ 79 | callbacks = callbacks if callbacks else [] 80 | # https://docs.getdbt.com/reference/programmatic-invocations 81 | runner = DbtCliRunner(callbacks=callbacks) 82 | result: dbtRunnerResult = runner.invoke(args) 83 | 84 | if result.success: 85 | return result 86 | 87 | if result.exception: 88 | raise result.exception 89 | 90 | # take error message and raise it as exception 91 | err_messages = [res.message for res in result.result if isinstance(res, RunResult) and res.status == 'error'] 92 | 93 | if err_messages: 94 | raise DbtRuntimeError(msg="\n".join(err_messages)) 95 | 96 | raise DbtRuntimeError(msg=f"DBT execution failed!") 97 | 98 | def manifest(self, partial_parse: bool = True, no_write_manifest: bool = True) -> Manifest: 99 | args = ["parse"] 100 | if partial_parse: 101 | args.append("--partial-parse") 102 | if no_write_manifest: 103 | args.append("--no-write-json") 104 | 105 | result = self.invoke(args=args) 106 | if not result.success: 107 | raise Exception(f"DBT execution failed. result:{result}") 108 | if isinstance(result.result, Manifest): 109 | return result.result 110 | 111 | raise Exception(f"DBT execution did not return Manifest object. returned:{type(result.result)}") 112 | 113 | def generate_docs(self, args: Optional[List[str]] = None): 114 | _args = ["docs", "generate"] + (args if args else []) 115 | self.invoke(args=_args) 116 | 117 | 118 | class OpenDbtProject(OpenDbtLogger): 119 | """ 120 | This class is used to take action on a dbt project. 121 | """ 122 | 123 | DEFAULT_TARGET = 'dev' # development 124 | 125 | def __init__(self, project_dir: Path, target: Optional[str] = None, profiles_dir: Optional[Path] = None, args: Optional[List[str]] = None, callbacks: Optional[List[Callable]] = None): 126 | super().__init__() 127 | self.project_dir: Path = project_dir 128 | self.profiles_dir: Optional[Path] = profiles_dir 129 | self.target: str = target if target else self.DEFAULT_TARGET 130 | self.args: List[str] = args if args else [] 131 | self.cli: OpenDbtCli = OpenDbtCli(project_dir=self.project_dir, profiles_dir=self.profiles_dir, callbacks=callbacks) 132 | 133 | @property 134 | def project(self) -> PartialProject: 135 | return self.cli.project 136 | 137 | @property 138 | def project_dict(self) -> dict: 139 | return self.cli.project_dict 140 | 141 | @property 142 | def project_vars(self) -> dict: 143 | return self.cli.project_vars 144 | 145 | def run(self, command: str = "build", target: Optional[str] = None, args: Optional[List[str]] = None, use_subprocess: bool = False, 146 | write_json: bool = False) -> Optional[dbtRunnerResult]: 147 | run_args = args if args else [] 148 | run_args.extend(["--target", target if target else self.target]) 149 | run_args.extend(["--project-dir", self.project_dir.as_posix()]) 150 | if self.profiles_dir: 151 | run_args.extend(["--profiles-dir", self.profiles_dir.as_posix()]) 152 | run_args = [command] + run_args + self.args 153 | if write_json: 154 | run_args.remove("--no-write-json") 155 | 156 | if use_subprocess: 157 | shell = False 158 | self.log.info(f"Working dir: {os.getcwd()}") 159 | py_executable = sys.executable if sys.executable else 'python' 160 | self.log.info(f"Python executable: {py_executable}") 161 | __command = [py_executable, '-m', 'opendbt'] + run_args 162 | self.log.info(f"Running command (shell={shell}) `{' '.join(__command)}`") 163 | Utils.runcommand(command=__command) 164 | return None 165 | 166 | self.log.info(f"Running `dbt {' '.join(run_args)}`") 167 | return self.cli.invoke(args=run_args) 168 | 169 | def manifest(self, partial_parse: bool = True, no_write_manifest: bool = True) -> Manifest: 170 | return self.cli.manifest(partial_parse=partial_parse, no_write_manifest=no_write_manifest) 171 | 172 | def generate_docs(self, args: Optional[List[str]] = None): 173 | return self.cli.generate_docs(args=args) 174 | -------------------------------------------------------------------------------- /opendbt/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | from opendbt import OpenDbtCli, default_project_dir, default_profiles_dir 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser(description="OpenDBT CLI") 9 | parser.add_argument( 10 | "--project-dir", 11 | default=None, 12 | help="Path to the dbt project directory. Defaults to the DBT_PROJECT_DIR environment variable or the current working directory.", 13 | ) 14 | parser.add_argument( 15 | "--profiles-dir", 16 | default=None, 17 | help="Path to the dbt profiles directory. Defaults to the DBT_PROFILES_DIR environment variable.", 18 | ) 19 | ns, args = parser.parse_known_args() 20 | project_dir = Path(ns.project_dir) if ns.project_dir else default_project_dir() 21 | profiles_dir = Path(ns.profiles_dir) if ns.profiles_dir else default_profiles_dir() 22 | 23 | OpenDbtCli(project_dir=project_dir, profiles_dir=profiles_dir).invoke(args=args) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /opendbt/airflow/__init__.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from pathlib import Path 3 | from typing import Tuple 4 | 5 | from airflow import DAG 6 | from airflow.models.baseoperator import BaseOperator 7 | from airflow.operators.empty import EmptyOperator 8 | 9 | import opendbt 10 | 11 | 12 | class OpenDbtExecutorOperator(BaseOperator): 13 | """ 14 | An Airflow operator for executing dbt commands. 15 | """ 16 | 17 | def __init__(self, 18 | project_dir: Path, 19 | command: str, 20 | target: str = None, 21 | profiles_dir: Path = None, 22 | select: str = None, 23 | args: list = None, 24 | # without using subprocess airflow randomly gets deadlock 25 | use_subprocess: bool = True, 26 | execution_timeout=timedelta(minutes=60), **kwargs) -> None: 27 | super().__init__(execution_timeout=execution_timeout, **kwargs) 28 | 29 | self.project_dir: Path = project_dir 30 | self.command = command 31 | self.profiles_dir: Path = profiles_dir 32 | self.target = target 33 | self.use_subprocess = use_subprocess 34 | self.args = args if args else [] 35 | 36 | if select: 37 | self.args += ["--select", select] 38 | 39 | # use separate colour for test and other executions 40 | if self.command == "test": 41 | self.ui_color = "#1CB1C2" 42 | else: 43 | self.ui_color = "#0084ff" 44 | 45 | def execute(self, context): 46 | """ 47 | Execute the dbt command. 48 | """ 49 | runner = opendbt.OpenDbtProject(project_dir=self.project_dir, 50 | profiles_dir=self.profiles_dir, 51 | target=self.target) 52 | runner.run(command=self.command, args=self.args, use_subprocess=self.use_subprocess) 53 | 54 | 55 | # pylint: disable=too-many-locals, too-many-branches 56 | class OpenDbtAirflowProject(opendbt.OpenDbtProject): 57 | 58 | def load_dbt_tasks(self, 59 | dag: DAG, 60 | start_node: BaseOperator = None, 61 | end_node: BaseOperator = None, 62 | tag: str = None, 63 | resource_type="all", 64 | include_dbt_seeds=False, 65 | include_singular_tests=False) -> Tuple[BaseOperator, BaseOperator]: 66 | """ 67 | This method is used to add dbt tasks to Given DAG. 68 | 69 | Parameters: 70 | dag (DAG): The Airflow DAG object where the dbt tasks will be added. 71 | start_node (BaseOperator, optional): The starting node of the DAG. If not provided, an EmptyOperator will be used. 72 | end_node (BaseOperator, optional): The ending node of the DAG. If not provided, an EmptyOperator will be used. 73 | tag (str, optional): The tag to filter the dbt tasks. If provided, only tasks with this tag will be added to the DAG. 74 | resource_type (str, optional): The type of dbt resource to run. It can be "all", "model", or "test". Default is "all". 75 | run_dbt_seeds (bool, optional): A flag to indicate whether to run dbt seeds before all other dbt jobs. Default is False. 76 | 77 | Returns: 78 | Tuple[BaseOperator, BaseOperator]: The start and end nodes of the DAG after adding the dbt tasks. 79 | """ 80 | 81 | start_node = start_node if start_node else EmptyOperator(task_id='dbt-%s-start' % self.project_dir.name, 82 | dag=dag) 83 | end_node = end_node if end_node else EmptyOperator(task_id='dbt-%s-end' % self.project_dir.name, dag=dag) 84 | 85 | if include_dbt_seeds: 86 | # add dbt seeds job after start node abd before all other dbt jobs 87 | first_node = start_node 88 | start_node = OpenDbtExecutorOperator(dag=dag, 89 | task_id="dbt-seeds", 90 | project_dir=self.project_dir, 91 | profiles_dir=self.profiles_dir, 92 | target=self.target, 93 | command="seed" 94 | ) 95 | start_node.set_upstream(first_node) 96 | 97 | manifest = self.manifest() 98 | dbt_tasks = {} 99 | # create all the jobs. granular as one job per model/table 100 | for key, node in manifest.nodes.items(): 101 | if tag and tag not in node.tags: 102 | self.log.debug( 103 | f"Skipping node:{node.name} because it dont have desired desired-tag={tag} node-tags={node.tags}") 104 | # LOG DEBUG OR TRACE here print(f" tag:{tag} NOT in {node.tags} SKIPP {node.name}") 105 | continue # skip if the node don't have the desired tag 106 | 107 | if resource_type == "test" and not str(node.name).startswith("source_"): 108 | if node.resource_type == "test": 109 | dbt_tasks[node.unique_id] = OpenDbtExecutorOperator(dag=dag, 110 | task_id=node.unique_id, 111 | project_dir=self.project_dir, 112 | profiles_dir=self.profiles_dir, 113 | target=self.target, 114 | command="test", 115 | select=node.name 116 | ) 117 | if node.resource_type == "model": 118 | dbt_tasks[node.unique_id] = EmptyOperator(dag=dag, task_id=node.unique_id) 119 | 120 | if node.resource_type == "model" and resource_type in ["all", "model"]: 121 | # NOTE `build` command also runs the tests that's why are skipping tests for models below 122 | dbt_tasks[node.unique_id] = OpenDbtExecutorOperator(dag=dag, 123 | task_id=node.unique_id, 124 | project_dir=self.project_dir, 125 | profiles_dir=self.profiles_dir, 126 | target=self.target, 127 | command="build", 128 | select=node.name 129 | ) 130 | 131 | if node.resource_type == "test" and str(node.name).startswith("source_") and resource_type in ["all", 132 | "test"]: 133 | # we are skipping model tests because they are included above with model execution( `build` command) 134 | # source table tests 135 | dbt_tasks[node.unique_id] = OpenDbtExecutorOperator(dag=dag, 136 | task_id=node.unique_id, 137 | project_dir=self.project_dir, 138 | profiles_dir=self.profiles_dir, 139 | target=self.target, 140 | command="test", 141 | select=node.name 142 | ) 143 | 144 | # set upstream dependencies using dbt dependencies 145 | for key, node in manifest.nodes.items(): 146 | if tag and tag not in node.tags: 147 | continue # skip if the node don't have the desired tag 148 | if node.unique_id in dbt_tasks: # node.resource_type == "model" or True or 149 | task = dbt_tasks[node.unique_id] 150 | if node.depends_on_nodes: 151 | for upstream_id in node.depends_on_nodes: 152 | if upstream_id in dbt_tasks: 153 | self.log.debug(f"Setting upstream of {task.task_id} -> {upstream_id}") 154 | task.set_upstream(dbt_tasks[upstream_id]) 155 | 156 | singular_tests = None 157 | if include_singular_tests: 158 | singular_tests = OpenDbtExecutorOperator(dag=dag, 159 | task_id=f"{self.project_dir.name}_singular_tests", 160 | project_dir=self.project_dir, 161 | profiles_dir=self.profiles_dir, 162 | target=self.target, 163 | command="test", 164 | select="test_type:singular" 165 | ) 166 | for k, task in dbt_tasks.items(): 167 | if not task.downstream_task_ids: 168 | # set downstream dependencies for the end nodes. 169 | self.log.debug(f"Setting downstream of {task.task_id} -> {end_node.task_id}") 170 | 171 | if include_singular_tests and singular_tests: 172 | task.set_downstream(singular_tests) 173 | else: 174 | task.set_downstream(end_node) 175 | 176 | if not task.upstream_task_ids: 177 | # set upstream dependencies for the nodes which don't have upstream dependency 178 | self.log.debug(f"Setting upstream of {task.task_id} -> {start_node}") 179 | task.set_upstream(start_node) 180 | 181 | if include_singular_tests: 182 | singular_tests.set_downstream(end_node) 183 | return start_node, end_node 184 | -------------------------------------------------------------------------------- /opendbt/airflow/plugin.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | # pylint: disable=inconsistent-return-statements 5 | def init_plugins_dbtdocs_page(dbt_docs_dir: Path): 6 | from airflow.plugins_manager import AirflowPlugin 7 | from flask import Blueprint 8 | from flask_appbuilder import BaseView, expose 9 | from flask import abort 10 | from airflow.www.auth import has_access 11 | from airflow.security import permissions 12 | 13 | class DBTDocsView(BaseView): 14 | route_base = "/dbt" 15 | default_view = "dbt_docs_index" 16 | 17 | @expose("/dbt_docs_index.html") # type: ignore[misc] 18 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)]) 19 | def dbt_docs_index(self): 20 | if not dbt_docs_dir.joinpath("index.html").is_file(): 21 | abort(404) 22 | else: 23 | return dbt_docs_dir.joinpath("index.html").read_text() 24 | # return self.render_template("index.html", content="") 25 | 26 | def return_json(self, json_file: str): 27 | if not dbt_docs_dir.joinpath(json_file).is_file(): 28 | abort(404) 29 | else: 30 | data = dbt_docs_dir.joinpath(json_file).read_text() 31 | return data, 200, {"Content-Type": "application/json"} 32 | 33 | @expose("/catalog.json") # type: ignore[misc] 34 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)]) 35 | def catalog(self): 36 | return self.return_json("catalog.json") 37 | 38 | @expose("/manifest.json") # type: ignore[misc] 39 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)]) 40 | def manifest(self): 41 | return self.return_json("manifest.json") 42 | 43 | @expose("/run_info.json") # type: ignore[misc] 44 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)]) 45 | def run_info(self): 46 | return self.return_json("run_info.json") 47 | 48 | @expose("/catalogl.json") # type: ignore[misc] 49 | @has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)]) 50 | def catalogl(self): 51 | return self.return_json("catalogl.json") 52 | 53 | 54 | # Creating a flask blueprint to integrate the templates and static folder 55 | bp = Blueprint( 56 | "DBT Plugin", 57 | __name__, 58 | template_folder=dbt_docs_dir.as_posix(), 59 | static_folder=dbt_docs_dir.as_posix(), 60 | # static_url_path='/dbtdocsview' 61 | ) 62 | 63 | class AirflowDbtDocsPlugin(AirflowPlugin): 64 | name = "DBT Docs Plugin" 65 | flask_blueprints = [bp] 66 | appbuilder_views = [{"name": "DBT Docs", "category": "", "view": DBTDocsView()}] 67 | 68 | return AirflowDbtDocsPlugin 69 | -------------------------------------------------------------------------------- /opendbt/dbt/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from dbt import version 4 | from packaging.version import Version 5 | 6 | from opendbt.runtime_patcher import RuntimePatcher 7 | 8 | OPENDBT_INDEX_HTML_FILE = Path(__file__).parent.joinpath('docs/index.html') 9 | 10 | try: 11 | # IMPORTANT! `opendbt.dbt` import needs to happen before any `dbt` import 12 | dbt_version = Version(version.get_installed_version().to_version_string(skip_matcher=True)) 13 | if Version("1.7.0") <= dbt_version < Version("1.8.0"): 14 | RuntimePatcher(module_name="dbt.include.global_project").patch_attribute(attribute_name="DOCS_INDEX_FILE_PATH", 15 | new_value=OPENDBT_INDEX_HTML_FILE) 16 | from opendbt.dbt.v17.adapters.factory import OpenDbtAdapterContainer 17 | from opendbt.dbt.v17.task.docs.generate import OpenDbtGenerateTask 18 | from opendbt.dbt.v17.config.runtime import OpenDbtRuntimeConfig 19 | from opendbt.dbt.v17.task.run import OpenDbtModelRunner 20 | elif Version("1.8.0") <= dbt_version < Version("1.10.0"): 21 | RuntimePatcher(module_name="dbt.task.docs").patch_attribute(attribute_name="DOCS_INDEX_FILE_PATH", 22 | new_value=OPENDBT_INDEX_HTML_FILE) 23 | from opendbt.dbt.v18.adapters.factory import OpenDbtAdapterContainer 24 | from opendbt.dbt.v18.task.docs.generate import OpenDbtGenerateTask 25 | from opendbt.dbt.v18.config.runtime import OpenDbtRuntimeConfig 26 | from opendbt.dbt.v18.task.run import OpenDbtModelRunner 27 | from opendbt.dbt.v18.artifacts.schemas.run import OpenDbtRunResultsArtifact 28 | else: 29 | raise Exception( 30 | f"Unsupported dbt version {dbt_version}, please make sure dbt version is supported/integrated by opendbt") 31 | 32 | RuntimePatcher(module_name="dbt.adapters.factory").patch_attribute(attribute_name="FACTORY", 33 | new_value=OpenDbtAdapterContainer()) 34 | # shared code patches 35 | from opendbt.dbt.shared.cli.main import sqlfluff 36 | from opendbt.dbt.shared.cli.main import sqlfluff_lint 37 | from opendbt.dbt.shared.cli.main import sqlfluff_fix 38 | from opendbt.dbt.shared.adapters.impl import OpenDbtBaseAdapter 39 | 40 | # dbt imports 41 | from dbt.cli.main import dbtRunner as DbtCliRunner 42 | from dbt.cli.main import dbtRunnerResult 43 | from dbt.cli.resolvers import default_profiles_dir, default_project_dir 44 | from dbt.config import PartialProject 45 | from dbt.contracts.graph.manifest import Manifest 46 | from dbt.contracts.results import RunResult 47 | from dbt.exceptions import DbtRuntimeError 48 | from dbt.task.base import get_nearest_project_dir 49 | except: 50 | raise -------------------------------------------------------------------------------- /opendbt/dbt/docs/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | catalog/ -------------------------------------------------------------------------------- /opendbt/dbt/shared/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/shared/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/adapters/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/shared/adapters/impl.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import sys 3 | import tempfile 4 | from typing import Dict 5 | 6 | from dbt.adapters.base import available, BaseAdapter 7 | 8 | from opendbt.runtime_patcher import PatchClass 9 | 10 | 11 | @PatchClass(module_name="dbt.adapters.base", target_name="BaseAdapter") 12 | class OpenDbtBaseAdapter(BaseAdapter): 13 | 14 | def _execute_python_model(self, model_name: str, compiled_code: str, **kwargs): 15 | try: 16 | with tempfile.NamedTemporaryFile(suffix=f'.py', delete=True) as model_file: 17 | try: 18 | model_file.write(compiled_code.lstrip().encode('utf-8')) 19 | model_file.flush() 20 | print(f"Created temp py file {model_file.name}") 21 | # Load the module spec 22 | spec = importlib.util.spec_from_file_location(model_name, model_file.name) 23 | # Create a module object 24 | module = importlib.util.module_from_spec(spec) 25 | # Load the module 26 | sys.modules[model_name] = module 27 | spec.loader.exec_module(module) 28 | dbt_obj = module.dbtObj(None) 29 | # Access and call `model` function of the model! 30 | # IMPORTANT: here we are passing down duckdb session from the adapter to the model 31 | module.model(dbt=dbt_obj, **kwargs) 32 | except Exception as e: 33 | raise Exception( 34 | f"Failed to load or execute python model:{model_name} from file {model_file.as_posix()}") from e 35 | finally: 36 | model_file.close() 37 | except Exception as e: 38 | raise Exception(f"Failed to create temp py file for model:{model_name}") from e 39 | 40 | @available 41 | def submit_local_python_job(self, parsed_model: Dict, compiled_code: str): 42 | connection = self.connections.get_if_exists() 43 | if not connection: 44 | connection = self.connections.get_thread_connection() 45 | self._execute_python_model(model_name=parsed_model['name'], compiled_code=compiled_code, 46 | session=connection.handle) 47 | 48 | @available 49 | def submit_local_dlt_job(self, parsed_model: Dict, compiled_code: str): 50 | connection = self.connections.get_if_exists() 51 | if not connection: 52 | connection = self.connections.get_thread_connection() 53 | 54 | import dlt 55 | # IMPORTANT: here we are pre-configuring and preparing dlt.pipeline for the model! 56 | _pipeline = dlt.pipeline( 57 | pipeline_name=str(parsed_model['unique_id']).replace(".", "-"), 58 | destination=dlt.destinations.duckdb(connection.handle._env.conn), 59 | dataset_name=parsed_model['schema'], 60 | dev_mode=False, 61 | ) 62 | self._execute_python_model(model_name=parsed_model['name'], compiled_code=compiled_code, pipeline=_pipeline) 63 | -------------------------------------------------------------------------------- /opendbt/dbt/shared/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/cli/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/shared/cli/main.py: -------------------------------------------------------------------------------- 1 | import click 2 | from dbt.cli import requires, params as p 3 | from dbt.cli.main import global_flags, cli 4 | 5 | from opendbt.dbt.shared.task.sqlfluff import SqlFluffTasks 6 | from opendbt.runtime_patcher import PatchFunction 7 | 8 | 9 | # dbt docs 10 | @cli.group() 11 | @click.pass_context 12 | @global_flags 13 | @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff") 14 | def sqlfluff(ctx, **kwargs): 15 | """Generate or serve the documentation website for your project""" 16 | 17 | 18 | # dbt docs generate 19 | @sqlfluff.command("lint") 20 | @click.pass_context 21 | @global_flags 22 | @p.defer 23 | @p.deprecated_defer 24 | @p.exclude 25 | @p.favor_state 26 | @p.deprecated_favor_state 27 | @p.full_refresh 28 | @p.indirect_selection 29 | @p.profile 30 | @p.profiles_dir 31 | @p.project_dir 32 | @p.resource_type 33 | @p.select 34 | @p.selector 35 | @p.show 36 | @p.state 37 | @p.defer_state 38 | @p.deprecated_state 39 | @p.store_failures 40 | @p.target 41 | @p.target_path 42 | @p.threads 43 | @p.vars 44 | @requires.postflight 45 | @requires.preflight 46 | @requires.profile 47 | @requires.project 48 | @requires.runtime_config 49 | @requires.manifest(write=False) 50 | @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff_lint") 51 | def sqlfluff_lint(ctx, **kwargs): 52 | """Generate the documentation website for your project""" 53 | task = SqlFluffTasks( 54 | ctx.obj["flags"], 55 | ctx.obj["runtime_config"], 56 | ctx.obj["manifest"], 57 | ) 58 | 59 | results = task.lint() 60 | success = task.interpret_results(results) 61 | return results, success 62 | 63 | 64 | # dbt docs generate 65 | @sqlfluff.command("fix") 66 | @click.pass_context 67 | @global_flags 68 | @p.defer 69 | @p.deprecated_defer 70 | @p.exclude 71 | @p.favor_state 72 | @p.deprecated_favor_state 73 | @p.full_refresh 74 | @p.indirect_selection 75 | @p.profile 76 | @p.profiles_dir 77 | @p.project_dir 78 | @p.resource_type 79 | @p.select 80 | @p.selector 81 | @p.show 82 | @p.state 83 | @p.defer_state 84 | @p.deprecated_state 85 | @p.store_failures 86 | @p.target 87 | @p.target_path 88 | @p.threads 89 | @p.vars 90 | @requires.postflight 91 | @requires.preflight 92 | @requires.profile 93 | @requires.project 94 | @requires.runtime_config 95 | @requires.manifest(write=False) 96 | @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff_lint") 97 | def sqlfluff_fix(ctx, **kwargs): 98 | """Generate the documentation website for your project""" 99 | task = SqlFluffTasks( 100 | ctx.obj["flags"], 101 | ctx.obj["runtime_config"], 102 | ctx.obj["manifest"], 103 | ) 104 | 105 | results = task.fix() 106 | success = task.interpret_results(results) 107 | return results, success 108 | -------------------------------------------------------------------------------- /opendbt/dbt/shared/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/shared/task/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/shared/task/sqlfluff.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from pathlib import Path 4 | from typing import Optional 5 | 6 | from dbt.config import RuntimeConfig 7 | from dbt.contracts.results import ( 8 | CatalogResults, 9 | CatalogArtifact, RunExecutionResult, 10 | ) 11 | from dbt.task.compile import CompileTask 12 | from sqlfluff.cli import commands 13 | from sqlfluff.core import Linter, FluffConfig 14 | from sqlfluff.core.linter import LintingResult 15 | from sqlfluff_templater_dbt import DbtTemplater 16 | 17 | 18 | class SqlFluffTasks(CompileTask): 19 | 20 | def __init__(self, args, config, manifest): 21 | super().__init__(args, config, manifest) 22 | 23 | self.sqlfluff_config = FluffConfig.from_path(path=self.config.project_root) 24 | 25 | templater_obj = self.sqlfluff_config._configs["core"]["templater_obj"] 26 | if isinstance(templater_obj, DbtTemplater): 27 | templater_obj: DbtTemplater 28 | self.config: RuntimeConfig 29 | templater_obj.project_root = self.config.project_root 30 | templater_obj.working_dir = self.config.project_root 31 | self.linter = Linter(self.sqlfluff_config) 32 | 33 | def get_result(self, elapsed_time: float, violations: list, num_violations: int): 34 | run_result = RunExecutionResult( 35 | results=[], 36 | elapsed_time=elapsed_time, 37 | generated_at=datetime.now(), 38 | # args=dbt.utils.args_to_dict(self.args), 39 | args={}, 40 | ) 41 | result = CatalogArtifact.from_results( 42 | nodes={}, 43 | sources={}, 44 | generated_at=datetime.now(), 45 | errors=violations if violations else None, 46 | compile_results=run_result, 47 | ) 48 | if num_violations > 0: 49 | setattr(result, 'exception', Exception(f"Linting {num_violations} errors found!")) 50 | result.exception = Exception(f"Linting {num_violations} errors found!") 51 | 52 | return result 53 | 54 | def lint(self) -> CatalogArtifact: 55 | os.chdir(self.config.project_root) 56 | lint_result: LintingResult = self.linter.lint_paths(paths=(self.config.project_root,)) 57 | result = self.get_result(lint_result.total_time, lint_result.get_violations(), lint_result.num_violations()) 58 | if lint_result.num_violations() > 0: 59 | print(f"Linting {lint_result.num_violations()} errors found!") 60 | for error in lint_result.as_records(): 61 | filepath = Path(error['filepath']) 62 | violations: list = error['violations'] 63 | if violations: 64 | print(f"File: {filepath.relative_to(self.config.project_root)}") 65 | for violation in violations: 66 | print(f" {violation}") 67 | # print(f"Code:{violation['code']} Line:{violation['start_line_no']}, LinePos:{violation['start_line_pos']} {violation['description']}") 68 | return result 69 | 70 | def fix(self) -> CatalogArtifact: 71 | os.chdir(self.config.project_root) 72 | lnt, formatter = commands.get_linter_and_formatter(cfg=self.sqlfluff_config) 73 | lint_result: LintingResult = lnt.lint_paths( 74 | paths=(self.config.project_root,), 75 | fix=True, 76 | apply_fixes=True 77 | ) 78 | result = self.get_result(lint_result.total_time, [], 0) 79 | return result 80 | 81 | @classmethod 82 | def interpret_results(self, results: Optional[CatalogResults]) -> bool: 83 | if results is None: 84 | return False 85 | if hasattr(results, "errors") and results.errors: 86 | return False 87 | return True 88 | -------------------------------------------------------------------------------- /opendbt/dbt/v17/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v17/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/adapters/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v17/adapters/factory.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from importlib import import_module 3 | 4 | from dbt.adapters import factory 5 | from dbt.adapters.factory import Adapter 6 | from dbt.events.functions import fire_event 7 | from dbt.events.types import AdapterRegistered 8 | from dbt.semver import VersionSpecifier 9 | 10 | from opendbt.runtime_patcher import PatchClass 11 | 12 | 13 | @PatchClass(module_name="dbt.adapters.factory", target_name="AdapterContainer") 14 | class OpenDbtAdapterContainer(factory.AdapterContainer): 15 | DBT_CUSTOM_ADAPTER_VAR = 'dbt_custom_adapter' 16 | 17 | def register_adapter(self, config: 'AdapterRequiredConfig') -> None: 18 | # ==== CUSTOM CODE ==== 19 | # ==== END CUSTOM CODE ==== 20 | adapter_name = config.credentials.type 21 | adapter_type = self.get_adapter_class_by_name(adapter_name) 22 | adapter_version = import_module(f".{adapter_name}.__version__", "dbt.adapters").version 23 | # ==== CUSTOM CODE ==== 24 | custom_adapter_class_name: str = self.get_custom_adapter_config_value(config) 25 | if custom_adapter_class_name and custom_adapter_class_name.strip(): 26 | # OVERRIDE DEFAULT ADAPTER BY USER GIVEN ADAPTER CLASS 27 | adapter_type = self.get_custom_adapter_class_by_name(custom_adapter_class_name) 28 | # ==== END CUSTOM CODE ==== 29 | adapter_version_specifier = VersionSpecifier.from_version_string( 30 | adapter_version 31 | ).to_version_string() 32 | fire_event( 33 | AdapterRegistered(adapter_name=adapter_name, adapter_version=adapter_version_specifier) 34 | ) 35 | with self.lock: 36 | if adapter_name in self.adapters: 37 | # this shouldn't really happen... 38 | return 39 | 40 | adapter: Adapter = adapter_type(config) # type: ignore 41 | self.adapters[adapter_name] = adapter 42 | 43 | def get_custom_adapter_config_value(self, config: 'AdapterRequiredConfig') -> str: 44 | # FIRST: it's set as cli value: dbt run --vars {'dbt_custom_adapter': 'custom_adapters.DuckDBAdapterV1Custom'} 45 | if hasattr(config, 'cli_vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.cli_vars: 46 | custom_adapter_class_name: str = config.cli_vars[self.DBT_CUSTOM_ADAPTER_VAR] 47 | if custom_adapter_class_name and custom_adapter_class_name.strip(): 48 | return custom_adapter_class_name 49 | # SECOND: it's set inside dbt_project.yml 50 | if hasattr(config, 'vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.vars.to_dict(): 51 | custom_adapter_class_name: str = config.vars.to_dict()[self.DBT_CUSTOM_ADAPTER_VAR] 52 | if custom_adapter_class_name and custom_adapter_class_name.strip(): 53 | return custom_adapter_class_name 54 | 55 | return None 56 | 57 | def get_custom_adapter_class_by_name(self, custom_adapter_class_name: str): 58 | if "." not in custom_adapter_class_name: 59 | raise ValueError(f"Unexpected adapter class name: `{custom_adapter_class_name}` ," 60 | f"Expecting something like:`my.sample.library.MyAdapterClass`") 61 | 62 | __module, __class = custom_adapter_class_name.rsplit('.', 1) 63 | try: 64 | user_adapter_module = importlib.import_module(__module) 65 | user_adapter_class = getattr(user_adapter_module, __class) 66 | return user_adapter_class 67 | except ModuleNotFoundError as mnfe: 68 | raise Exception(f"Module of provided adapter not found, provided: {custom_adapter_class_name}") from mnfe 69 | -------------------------------------------------------------------------------- /opendbt/dbt/v17/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/config/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v17/config/runtime.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | from typing import Mapping 4 | 5 | from dbt.config import RuntimeConfig 6 | from dbt.config.project import path_exists, _load_yaml 7 | from dbt.constants import DEPENDENCIES_FILE_NAME 8 | from dbt.exceptions import DbtProjectError, NonUniquePackageNameError 9 | from typing_extensions import override 10 | 11 | from opendbt.runtime_patcher import PatchClass 12 | 13 | def load_yml_dict(file_path): 14 | ret = {} 15 | if path_exists(file_path): 16 | ret = _load_yaml(file_path) or {} 17 | return ret 18 | 19 | # pylint: disable=too-many-ancestors 20 | @dataclass 21 | @PatchClass(module_name="dbt.config", target_name="RuntimeConfig") 22 | @PatchClass(module_name="dbt.cli.requires", target_name="RuntimeConfig") 23 | class OpenDbtRuntimeConfig(RuntimeConfig): 24 | def load_dependence_projects(self): 25 | dependencies_yml_dict = load_yml_dict(f"{self.project_root}/{DEPENDENCIES_FILE_NAME}") 26 | 27 | if "projects" not in dependencies_yml_dict: 28 | return 29 | 30 | projects = dependencies_yml_dict["projects"] 31 | project_root_parent = Path(self.project_root).parent 32 | for project in projects: 33 | path = project_root_parent.joinpath(project['name']) 34 | try: 35 | project = self.new_project(str(path.as_posix())) 36 | except DbtProjectError as e: 37 | raise DbtProjectError( 38 | f"Failed to read depending project: {e} \n project path:{path.as_posix()}", 39 | result_type="invalid_project", 40 | path=path, 41 | ) from e 42 | 43 | yield project.project_name, project 44 | 45 | @override 46 | def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]: 47 | # if self.dependencies is None: 48 | 49 | if self.dependencies is None: 50 | # this sets self.dependencies variable! 51 | self.dependencies = super().load_dependencies(base_only=base_only) 52 | 53 | # additionally load `projects` defined in `dependencies.yml` 54 | for project_name, project in self.load_dependence_projects(): 55 | if project_name in self.dependencies: 56 | raise NonUniquePackageNameError(project_name) 57 | self.dependencies[project_name] = project 58 | 59 | return self.dependencies 60 | -------------------------------------------------------------------------------- /opendbt/dbt/v17/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/task/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v17/task/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v17/task/docs/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v17/task/docs/generate.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import click 5 | from dbt.task.generate import GenerateTask, CATALOG_FILENAME, MANIFEST_FILE_NAME 6 | 7 | from opendbt.catalog import OpenDbtCatalog 8 | from opendbt.runtime_patcher import PatchClass 9 | 10 | 11 | @PatchClass(module_name="dbt.task.generate", target_name="GenerateTask") 12 | class OpenDbtGenerateTask(GenerateTask): 13 | 14 | def deploy_user_index_html(self): 15 | # run custom code 16 | target = Path(self.config.project_target_path).joinpath("index.html") 17 | for dir in self.config.docs_paths: 18 | index_html = Path(self.config.project_root).joinpath(dir).joinpath("index.html") 19 | if index_html.is_file() and index_html.exists(): 20 | # override default dbt provided index.html with user index.html file 21 | shutil.copyfile(index_html, target) 22 | click.echo(f"Using user provided documentation page: {index_html.as_posix()}") 23 | return 24 | 25 | def generate_opendbt_catalogl_json(self): 26 | catalog_path = Path(self.config.project_target_path).joinpath(CATALOG_FILENAME) 27 | manifest_path = Path(self.config.project_target_path).joinpath(MANIFEST_FILE_NAME) 28 | catalog = OpenDbtCatalog(manifest_path=manifest_path, catalog_path=catalog_path) 29 | catalog.export() 30 | 31 | def run(self): 32 | # Call the original dbt run method 33 | result = super().run() 34 | self.deploy_user_index_html() 35 | self.generate_opendbt_catalogl_json() 36 | return result 37 | -------------------------------------------------------------------------------- /opendbt/dbt/v17/task/run.py: -------------------------------------------------------------------------------- 1 | from dbt.contracts.results import NodeStatus 2 | from dbt.events.base_types import EventLevel 3 | from dbt.events.functions import fire_event 4 | from dbt.events.types import ( 5 | LogModelResult, 6 | ) 7 | from dbt.task import run 8 | 9 | from opendbt.runtime_patcher import PatchClass 10 | 11 | 12 | @PatchClass(module_name="dbt.task.run", target_name="ModelRunner") 13 | class OpenDbtModelRunner(run.ModelRunner): 14 | 15 | def print_result_adapter_response(self, result): 16 | if hasattr(result, 'adapter_response') and result.adapter_response: 17 | if result.status == NodeStatus.Error: 18 | status = result.status 19 | level = EventLevel.ERROR 20 | else: 21 | status = result.message 22 | level = EventLevel.INFO 23 | fire_event( 24 | LogModelResult( 25 | description=str(result.adapter_response), 26 | status=status, 27 | index=self.node_index, 28 | total=self.num_nodes, 29 | execution_time=result.execution_time, 30 | node_info=self.node.node_info, 31 | ), 32 | level=level, 33 | ) 34 | 35 | def print_result_line(self, result): 36 | super().print_result_line(result) 37 | self.print_result_adapter_response(result=result) 38 | -------------------------------------------------------------------------------- /opendbt/dbt/v18/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/adapters/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/adapters/factory.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from multiprocessing.context import SpawnContext 3 | from typing import Optional 4 | 5 | from dbt.adapters import factory 6 | from dbt.adapters.contracts.connection import AdapterRequiredConfig 7 | from dbt.adapters.events.types import ( 8 | AdapterRegistered, 9 | ) 10 | from dbt.adapters.factory import Adapter 11 | from dbt_common.events.base_types import EventLevel 12 | from dbt_common.events.functions import fire_event 13 | 14 | from opendbt.runtime_patcher import PatchClass 15 | 16 | 17 | @PatchClass(module_name="dbt.adapters.factory", target_name="AdapterContainer") 18 | class OpenDbtAdapterContainer(factory.AdapterContainer): 19 | DBT_CUSTOM_ADAPTER_VAR = 'dbt_custom_adapter' 20 | 21 | def register_adapter( 22 | self, 23 | config: 'AdapterRequiredConfig', 24 | mp_context: SpawnContext, 25 | adapter_registered_log_level: Optional[EventLevel] = EventLevel.INFO, 26 | ) -> None: 27 | adapter_name = config.credentials.type 28 | adapter_type = self.get_adapter_class_by_name(adapter_name) 29 | adapter_version = self._adapter_version(adapter_name) 30 | # ==== CUSTOM CODE ==== 31 | custom_adapter_class_name: str = self.get_custom_adapter_config_value(config) 32 | if custom_adapter_class_name and custom_adapter_class_name.strip(): 33 | # OVERRIDE DEFAULT ADAPTER BY USER GIVEN ADAPTER CLASS 34 | adapter_type = self.get_custom_adapter_class_by_name(custom_adapter_class_name) 35 | # ==== END CUSTOM CODE ==== 36 | fire_event( 37 | AdapterRegistered(adapter_name=adapter_name, adapter_version=adapter_version), 38 | level=adapter_registered_log_level, 39 | ) 40 | with self.lock: 41 | if adapter_name in self.adapters: 42 | # this shouldn't really happen... 43 | return 44 | 45 | adapter: Adapter = adapter_type(config, mp_context) # type: ignore 46 | self.adapters[adapter_name] = adapter 47 | 48 | def get_custom_adapter_config_value(self, config: 'AdapterRequiredConfig') -> str: 49 | # FIRST: it's set as cli value: dbt run --vars {'dbt_custom_adapter': 'custom_adapters.DuckDBAdapterV1Custom'} 50 | if hasattr(config, 'cli_vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.cli_vars: 51 | custom_adapter_class_name: str = config.cli_vars[self.DBT_CUSTOM_ADAPTER_VAR] 52 | if custom_adapter_class_name and custom_adapter_class_name.strip(): 53 | return custom_adapter_class_name 54 | # SECOND: it's set inside dbt_project.yml 55 | if hasattr(config, 'vars') and self.DBT_CUSTOM_ADAPTER_VAR in config.vars.to_dict(): 56 | custom_adapter_class_name: str = config.vars.to_dict()[self.DBT_CUSTOM_ADAPTER_VAR] 57 | if custom_adapter_class_name and custom_adapter_class_name.strip(): 58 | return custom_adapter_class_name 59 | 60 | return None 61 | 62 | def get_custom_adapter_class_by_name(self, custom_adapter_class_name: str): 63 | if "." not in custom_adapter_class_name: 64 | raise ValueError(f"Unexpected adapter class name: `{custom_adapter_class_name}` ," 65 | f"Expecting something like:`my.sample.library.MyAdapterClass`") 66 | 67 | __module, __class = custom_adapter_class_name.rsplit('.', 1) 68 | try: 69 | user_adapter_module = importlib.import_module(__module) 70 | user_adapter_class = getattr(user_adapter_module, __class) 71 | return user_adapter_class 72 | except ModuleNotFoundError as mnfe: 73 | raise Exception(f"Module of provided adapter not found, provided: {custom_adapter_class_name}") from mnfe 74 | -------------------------------------------------------------------------------- /opendbt/dbt/v18/artifacts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/artifacts/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/artifacts/schemas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/artifacts/schemas/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/artifacts/schemas/run.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | from dbt.artifacts.schemas import run 5 | from filelock import FileLock, Timeout 6 | 7 | from opendbt.runtime_patcher import PatchClass 8 | from opendbt.utils import Utils 9 | 10 | 11 | # pylint: disable=too-many-ancestors 12 | @PatchClass(module_name="dbt.artifacts.schemas.run", target_name="RunResultsArtifact") 13 | @PatchClass(module_name="dbt.artifacts.schemas.run.v5.run", target_name="RunResultsArtifact") 14 | class OpenDbtRunResultsArtifact(run.RunResultsArtifact): 15 | 16 | def run_info(self) -> dict: 17 | run_info_data: dict = self.to_dict(omit_none=False) 18 | nodes = {} 19 | for r in self.results: 20 | key = r.unique_id 21 | execute_entry = next((item for item in r.timing if item.name == "execute"), None) 22 | run_completed_at = execute_entry.completed_at if execute_entry else None 23 | nodes[key] = {} 24 | nodes[key]['run_status'] = r.status 25 | nodes[key]['run_completed_at'] = run_completed_at.strftime("%Y-%m-%d %H:%M:%S") 26 | nodes[key]['run_message'] = r.message 27 | nodes[key]['run_failures'] = r.failures 28 | nodes[key]['run_adapter_response'] = r.adapter_response 29 | 30 | # pylint: disable=unsupported-assignment-operation 31 | run_info_data['nodes'] = nodes 32 | run_info_data.pop('results', None) 33 | return run_info_data 34 | 35 | def write_run_info(self, path: str): 36 | run_info_file = Path(path).parent.joinpath("run_info.json") 37 | command = self.args.get('which', "NONE") 38 | if command not in ['run', 'build', 'test']: 39 | return 40 | 41 | lock_file = run_info_file.with_suffix(".json.lock") # Use a distinct lock file extension 42 | data = {} 43 | try: 44 | # 2. Acquire lock (wait up to 10 seconds) 45 | lock = FileLock(lock_file, timeout=10) 46 | with lock: 47 | if run_info_file.exists() and run_info_file.stat().st_size > 0: 48 | try: 49 | with open(run_info_file, 'r', encoding='utf-8') as f: 50 | data = json.load(f) 51 | # Ensure it's a dictionary before merging 52 | if not isinstance(data, dict): 53 | print(f"Warning: Content of {run_info_file} is not a JSON object. Overwriting.") 54 | data = {} 55 | except json.JSONDecodeError: 56 | print(f"Warning: Could not decode JSON from {run_info_file}. Overwriting.") 57 | except Exception as e: 58 | print(f"Error reading {run_info_file}: {e}. Starting fresh.") 59 | 60 | new_data = self.run_info() 61 | data = Utils.merge_dicts(data, new_data) 62 | 63 | try: 64 | with open(run_info_file, 'w', encoding='utf-8') as f: 65 | json.dump(data, f) 66 | except Exception as e: 67 | print(f"Error writing merged data to {run_info_file}: {e}") 68 | 69 | except Timeout: 70 | print( 71 | f"Error: Could not acquire lock on {lock_file} within 10 seconds. Skipping update for {run_info_file}.") 72 | except Exception as e: 73 | # Catch other potential errors during locking or file operations 74 | print(f"An unexpected error occurred processing {run_info_file}: {e}") 75 | 76 | def write(self, path: str): 77 | super().write(path) 78 | self.write_run_info(path=path) 79 | -------------------------------------------------------------------------------- /opendbt/dbt/v18/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/config/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/config/runtime.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | from typing import Mapping 4 | 5 | from dbt.config import RuntimeConfig 6 | from dbt.config.project import load_yml_dict 7 | from dbt.constants import DEPENDENCIES_FILE_NAME 8 | from dbt.exceptions import DbtProjectError, NonUniquePackageNameError 9 | from typing_extensions import override 10 | 11 | from opendbt.runtime_patcher import PatchClass 12 | 13 | # pylint: disable=too-many-ancestors 14 | @dataclass 15 | @PatchClass(module_name="dbt.config", target_name="RuntimeConfig") 16 | @PatchClass(module_name="dbt.cli.requires", target_name="RuntimeConfig") 17 | class OpenDbtRuntimeConfig(RuntimeConfig): 18 | def load_dependence_projects(self): 19 | dependencies_yml_dict = load_yml_dict(f"{self.project_root}/{DEPENDENCIES_FILE_NAME}") 20 | 21 | if "projects" not in dependencies_yml_dict: 22 | return 23 | 24 | projects = dependencies_yml_dict["projects"] 25 | project_root_parent = Path(self.project_root).parent 26 | for project in projects: 27 | path = project_root_parent.joinpath(project['name']) 28 | try: 29 | project = self.new_project(str(path.as_posix())) 30 | except DbtProjectError as e: 31 | raise DbtProjectError( 32 | f"Failed to read depending project: {e} \n project path:{path.as_posix()}", 33 | result_type="invalid_project", 34 | path=path, 35 | ) from e 36 | 37 | yield project.project_name, project 38 | 39 | @override 40 | def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]: 41 | # if self.dependencies is None: 42 | 43 | if self.dependencies is None: 44 | # this sets self.dependencies variable! 45 | self.dependencies = super().load_dependencies(base_only=base_only) 46 | 47 | # additionally load `projects` defined in `dependencies.yml` 48 | for project_name, project in self.load_dependence_projects(): 49 | if project_name in self.dependencies: 50 | raise NonUniquePackageNameError(project_name) 51 | self.dependencies[project_name] = project 52 | 53 | return self.dependencies 54 | -------------------------------------------------------------------------------- /opendbt/dbt/v18/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/task/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/task/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/opendbt/812f2143c73f974d2f6e1932d9e29481071679ea/opendbt/dbt/v18/task/docs/__init__.py -------------------------------------------------------------------------------- /opendbt/dbt/v18/task/docs/generate.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import click 5 | from dbt.task.docs.generate import GenerateTask, CATALOG_FILENAME, MANIFEST_FILE_NAME 6 | 7 | from opendbt.catalog import OpenDbtCatalog 8 | from opendbt.runtime_patcher import PatchClass 9 | 10 | 11 | @PatchClass(module_name="dbt.task.docs.generate", target_name="GenerateTask") 12 | class OpenDbtGenerateTask(GenerateTask): 13 | 14 | def deploy_user_index_html(self): 15 | # run custom code 16 | target = Path(self.config.project_target_path).joinpath("index.html") 17 | for dir in self.config.docs_paths: 18 | index_html = Path(self.config.project_root).joinpath(dir).joinpath("index.html") 19 | if index_html.is_file() and index_html.exists(): 20 | # override default dbt provided index.html with user index.html file 21 | shutil.copyfile(index_html, target) 22 | click.echo(f"Using user provided documentation page: {index_html.as_posix()}") 23 | return 24 | 25 | def generate_opendbt_catalogl_json(self): 26 | catalog_path = Path(self.config.project_target_path).joinpath(CATALOG_FILENAME) 27 | manifest_path = Path(self.config.project_target_path).joinpath(MANIFEST_FILE_NAME) 28 | catalog = OpenDbtCatalog(manifest_path=manifest_path, catalog_path=catalog_path) 29 | catalog.export() 30 | 31 | def run(self): 32 | # Call the original dbt run method 33 | result = super().run() 34 | self.deploy_user_index_html() 35 | self.generate_opendbt_catalogl_json() 36 | return result 37 | -------------------------------------------------------------------------------- /opendbt/dbt/v18/task/run.py: -------------------------------------------------------------------------------- 1 | from dbt.artifacts.schemas.results import NodeStatus 2 | from dbt.events.types import ( 3 | LogModelResult, 4 | ) 5 | from dbt.task import run 6 | from dbt_common.events.base_types import EventLevel 7 | from dbt_common.events.functions import fire_event 8 | 9 | from opendbt.runtime_patcher import PatchClass 10 | 11 | 12 | @PatchClass(module_name="dbt.task.run", target_name="ModelRunner") 13 | class OpenDbtModelRunner(run.ModelRunner): 14 | 15 | def print_result_adapter_response(self, result): 16 | if hasattr(result, 'adapter_response') and result.adapter_response: 17 | if result.status == NodeStatus.Error: 18 | status = result.status 19 | level = EventLevel.ERROR 20 | else: 21 | status = result.message 22 | level = EventLevel.INFO 23 | fire_event( 24 | LogModelResult( 25 | description=str(result.adapter_response), 26 | status=status, 27 | index=self.node_index, 28 | total=self.num_nodes, 29 | execution_time=result.execution_time, 30 | node_info=self.node.node_info, 31 | ), 32 | level=level, 33 | ) 34 | 35 | def print_result_line(self, result): 36 | super().print_result_line(result) 37 | self.print_result_adapter_response(result=result) 38 | -------------------------------------------------------------------------------- /opendbt/examples.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from multiprocessing.context import SpawnContext 3 | 4 | from dbt.adapters.duckdb import DuckDBAdapter 5 | 6 | 7 | class DuckDBAdapterV2Custom(DuckDBAdapter): 8 | pass 9 | 10 | # NOTE! used for testing 11 | class DuckDBAdapterTestingOnlyDbt17(DuckDBAdapter): 12 | def __init__(self, config) -> None: 13 | print(f"WARNING: Using User Provided DBT Adapter: {type(self).__module__}.{type(self).__name__}") 14 | # pylint: disable=no-value-for-parameter 15 | super().__init__(config=config) 16 | raise Exception("Custom user defined test adapter activated, test exception") 17 | 18 | 19 | # NOTE! used for testing 20 | class DuckDBAdapterTestingOnlyDbt18(DuckDBAdapter): 21 | def __init__(self, config, mp_context: SpawnContext) -> None: 22 | print(f"WARNING: Using User Provided DBT Adapter: {type(self).__module__}.{type(self).__name__}") 23 | super().__init__(config=config, mp_context=mp_context) 24 | raise Exception("Custom user defined test adapter activated, test exception") 25 | 26 | 27 | def email_dbt_test_callback(event: "EventMsg"): 28 | if event.info.name == "LogTestResult" and event.info.level in ["warn", "error"]: 29 | logging.getLogger('dbtcallbacks').warning("DBT callback `email_dbt_test_callback` called!") 30 | email_subject = f"[DBT] test {event.info.level} raised" 31 | email_html_content = f"""Following test raised {event.info.level}! 32 | dbt msg: {event.info.msg} 33 | dbt test: {event.data.name} 34 | dbt node_relation: {event.data.node_info.node_relation} 35 | --------------- full data --------------- 36 | dbt data: {event.data} 37 | """ 38 | # @TODO send email alert using airflow 39 | # from airflow.utils.email import send_email 40 | # send_email( 41 | # subject=email_subject, 42 | # to="my-slack-notification-channel@slack.com", 43 | # html_content=email_html_content 44 | # ) 45 | logging.getLogger('dbtcallbacks').error("Callback email sent!") 46 | -------------------------------------------------------------------------------- /opendbt/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | class OpenDbtLogger: 6 | _log = None 7 | 8 | @property 9 | def log(self) -> logging.Logger: 10 | if self._log is None: 11 | self._log = logging.getLogger(name="opendbt") 12 | if not self._log.hasHandlers(): 13 | handler = logging.StreamHandler(sys.stdout) 14 | formatter = logging.Formatter("[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s") 15 | handler.setFormatter(formatter) 16 | handler.setLevel(logging.INFO) 17 | self._log.addHandler(handler) 18 | return self._log -------------------------------------------------------------------------------- /opendbt/macros/executedlt.sql: -------------------------------------------------------------------------------- 1 | {% materialization executedlt, supported_languages=['python']%} 2 | 3 | {%- set identifier = model['alias'] -%} 4 | {%- set language = model['language'] -%} 5 | 6 | {% set grant_config = config.get('grants') %} 7 | 8 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 9 | {%- set target_relation = api.Relation.create(identifier=identifier, 10 | schema=schema, 11 | database=database, type='table') -%} 12 | {{ run_hooks(pre_hooks) }} 13 | 14 | {% call noop_statement(name='main', message='Executed DLT pipeline', code=compiled_code, rows_affected=-1, res=None) %} 15 | {%- set res = adapter.submit_local_dlt_job(model, compiled_code) -%} 16 | {% endcall %} 17 | {{ run_hooks(post_hooks) }} 18 | 19 | {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %} 20 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 21 | 22 | {% do persist_docs(target_relation, model) %} 23 | 24 | {{ return({'relations': [target_relation]}) }} 25 | 26 | {% endmaterialization %} 27 | -------------------------------------------------------------------------------- /opendbt/macros/executepython.sql: -------------------------------------------------------------------------------- 1 | {% materialization executepython, supported_languages=['python']%} 2 | 3 | {%- set identifier = model['alias'] -%} 4 | {%- set language = model['language'] -%} 5 | 6 | {% set grant_config = config.get('grants') %} 7 | 8 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 9 | {%- set target_relation = api.Relation.create(identifier=identifier, 10 | schema=schema, 11 | database=database, type='table') -%} 12 | {{ run_hooks(pre_hooks) }} 13 | 14 | {% call noop_statement(name='main', message='Executed Python', code=compiled_code, rows_affected=-1, res=None) %} 15 | {%- set res = adapter.submit_local_python_job(model, compiled_code) -%} 16 | {% endcall %} 17 | {{ run_hooks(post_hooks) }} 18 | 19 | {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %} 20 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 21 | 22 | {% do persist_docs(target_relation, model) %} 23 | 24 | {{ return({'relations': [target_relation]}) }} 25 | 26 | {% endmaterialization %} 27 | -------------------------------------------------------------------------------- /opendbt/macros/executesql.sql: -------------------------------------------------------------------------------- 1 | {% materialization executesql, supported_languages=['sql']%} 2 | 3 | {# 4 | modified version of table materialization. it executes compiled sql statement as is. 5 | #} 6 | 7 | {%- set identifier = model['alias'] -%} 8 | {%- set language = model['language'] -%} 9 | 10 | {% set grant_config = config.get('grants') %} 11 | 12 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 13 | {%- set target_relation = api.Relation.create(identifier=identifier, 14 | schema=schema, 15 | database=database, type='table') -%} 16 | 17 | {{ run_hooks(pre_hooks) }} 18 | 19 | {{ log(msg="Executing SQL: " ~ compiled_code ~ "", info=True) }} 20 | {% call statement('main', language=language, fetch_result=False) -%} 21 | {{ compiled_code }} 22 | {%- endcall %} 23 | 24 | {%- set result = load_result('main') -%} 25 | {{ log(msg="Execution result " ~ result ~ "", info=True) }} 26 | {# DISABLED 27 | {%- set result_data = result['data'] -%} 28 | {{ log(msg="Execution result_data " ~ result_data ~ "", info=True) }} 29 | {%- set result_status = result['response'] -%} 30 | {{ log(msg="Execution result_status " ~ result_status ~ "", info=True) }} 31 | END-DISABLED #} 32 | 33 | {{ run_hooks(post_hooks) }} 34 | 35 | {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %} 36 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 37 | 38 | {% do persist_docs(target_relation, model) %} 39 | 40 | {{ return({'relations': [target_relation]}) }} 41 | 42 | {% endmaterialization %} 43 | -------------------------------------------------------------------------------- /opendbt/runtime_patcher.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from typing import Callable, Type, Any 3 | from opendbt.logger import OpenDbtLogger 4 | 5 | 6 | class RuntimePatcher(OpenDbtLogger): 7 | """ 8 | A utility class for patching modules and classes at runtime. 9 | 10 | This class provides a simplified way to replace existing functions, 11 | classes, or attributes within modules with custom implementations. 12 | """ 13 | 14 | def __init__(self, module_name: str): 15 | """ 16 | Initializes the RuntimePatcher for a specific module. 17 | 18 | Args: 19 | module_name: The name of the module to patch (e.g., "dbt.config"). 20 | """ 21 | self.module_name = module_name 22 | self.module = importlib.import_module(module_name) 23 | 24 | def patch_function(self, function_name: str, new_function: Callable): 25 | """ 26 | Patches a function within the module. 27 | 28 | Args: 29 | function_name: The name of the function to patch. 30 | new_function: The new function to use as a replacement. 31 | """ 32 | setattr(self.module, function_name, new_function) 33 | self.log.debug(f"Patched function: {self.module_name}.{function_name}") 34 | 35 | def patch_class(self, class_name: str, new_class: Type): 36 | """ 37 | Patches a class within the module. 38 | 39 | Args: 40 | class_name: The name of the class to patch. 41 | new_class: The new class to use as a replacement. 42 | """ 43 | setattr(self.module, class_name, new_class) 44 | self.log.debug(f"Patched class: {self.module_name}.{class_name}") 45 | 46 | def patch_attribute(self, attribute_name: str, new_value: Any): 47 | """ 48 | Patches an attribute within the module. 49 | 50 | Args: 51 | attribute_name: The name of the attribute to patch. 52 | new_value: The new value to assign to the attribute. 53 | """ 54 | setattr(self.module, attribute_name, new_value) 55 | self.log.debug(f"Patched attribute: {self.module_name}.{attribute_name}") 56 | 57 | def patch_class_method(self, class_name: str, method_name: str, new_method: Callable): 58 | """ 59 | Patches a class method within the module. 60 | 61 | Args: 62 | class_name: The name of the class containing the method. 63 | method_name: The name of the method to patch. 64 | new_method: The new method to use as a replacement. 65 | """ 66 | target_class = getattr(self.module, class_name) 67 | setattr(target_class, method_name, new_method) 68 | self.log.debug(f"Patched class method: {self.module_name}.{class_name}.{method_name}") 69 | 70 | 71 | class _PatchDecorator: 72 | """ 73 | Base class for patch decorators 74 | """ 75 | 76 | def __init__(self, module_name: str, target_name: str): 77 | self.module_name = module_name 78 | self.target_name = target_name 79 | self.patcher = RuntimePatcher(self.module_name) 80 | 81 | 82 | class PatchClass(_PatchDecorator): 83 | """ 84 | A decorator for patching classes at runtime. 85 | """ 86 | 87 | def __call__(self, target: Type): 88 | self.patcher.patch_class(self.target_name, target) 89 | return target 90 | 91 | 92 | class PatchFunction(_PatchDecorator): 93 | """ 94 | A decorator for patching functions at runtime. 95 | """ 96 | 97 | def __call__(self, target: Callable): 98 | self.patcher.patch_function(self.target_name, target) 99 | return target 100 | 101 | 102 | class PatchAttribute(_PatchDecorator): 103 | """ 104 | A decorator for patching attributes at runtime. 105 | """ 106 | 107 | def __call__(self, target: Any): 108 | # if it is callable, call it to get the value 109 | if callable(target): 110 | target = target() 111 | self.patcher.patch_attribute(self.target_name, target) 112 | return target 113 | 114 | 115 | class PatchClassMethod(_PatchDecorator): 116 | """ 117 | A decorator for patching class methods at runtime. 118 | """ 119 | 120 | def __init__(self, module_name: str, class_name: str, method_name: str): 121 | super().__init__(module_name, class_name) 122 | self.method_name = method_name 123 | 124 | def __call__(self, target: Callable): 125 | self.patcher.patch_class_method(self.target_name, self.method_name, target) 126 | return target 127 | 128 | # Example Usage: 129 | 130 | # Example to use PatchClass for override the ModelRunner class 131 | # @PatchClass(module_name="dbt.task.run", target_name="ModelRunner") 132 | # class CustomModelRunner: 133 | # def __init__(self, *args, **kwargs): 134 | # print("Custom ModelRunner initialized!") 135 | # 136 | # 137 | # # Example to use PatchClass for override the RuntimeConfig class 138 | # @PatchClass(module_name="dbt.config", target_name="RuntimeConfig") 139 | # class CustomRuntimeConfig: 140 | # def __init__(self, *args, **kwargs): 141 | # print("Custom RuntimeConfig initialized!") 142 | # 143 | # # Example to use PatchAttribute for override the FACTORY attribute 144 | # @PatchAttribute(module_name="dbt.adapters.factory", target_name="FACTORY") 145 | # def get_custom_open_dbt_adapter_container(): 146 | # class CustomOpenDbtAdapterContainer: 147 | # def __init__(self, *args, **kwargs): 148 | # print("Custom OpenDbtAdapterContainer initialized!") 149 | # return CustomOpenDbtAdapterContainer 150 | # 151 | # 152 | # # Example to use PatchFunction for override the sqlfluff_lint function 153 | # @PatchFunction(module_name="dbt.cli.main", target_name="sqlfluff_lint") 154 | # def custom_sqlfluff_lint(): 155 | # print("Custom sqlfluff_lint called!") 156 | 157 | # Example to patch class method 158 | # @PatchClassMethod(module_name="dbt.adapters.factory", class_name="AdapterContainer", method_name="get_adapter") 159 | # def custom_get_adapter(self, *args, **kwargs): 160 | # print("Custom get_adapter method called!") 161 | # return "Custom Adapter" 162 | -------------------------------------------------------------------------------- /opendbt/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import subprocess 3 | 4 | 5 | class Utils: 6 | 7 | @staticmethod 8 | def runcommand(command: list, shell=False): 9 | with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, 10 | universal_newlines=True, shell=shell) as p: 11 | for line in p.stdout: 12 | if line: 13 | print(line.strip()) 14 | 15 | if p.returncode != 0: 16 | raise subprocess.CalledProcessError(p.returncode, p.args) 17 | 18 | @staticmethod 19 | def import_module_attribute_by_name(module_name: str): 20 | if "." not in module_name: 21 | raise ValueError(f"Unexpected module name: `{module_name}` ," 22 | f"Expecting something like:`my.sample.library.MyClass` or `my.sample.library.my_method`") 23 | 24 | __module, __attribute = module_name.rsplit('.', 1) 25 | try: 26 | _adapter_module = importlib.import_module(__module) 27 | _adapter_attribute = getattr(_adapter_module, __attribute) 28 | return _adapter_attribute 29 | except ModuleNotFoundError as mnfe: 30 | raise Exception(f"Provided module not found, provided: {module_name}") from mnfe 31 | 32 | @staticmethod 33 | def merge_dicts(dict1: dict, dict2: dict) -> dict: 34 | """ 35 | Recursively merges dict2 into dict1, when both values exists dict1 value retained 36 | Returns: 37 | A new dictionary representing the merged result. 38 | """ 39 | merged = dict1.copy() 40 | 41 | for key, value in dict2.items(): 42 | if key in merged: 43 | # Check if both values are dictionary-like (mappings) 44 | if isinstance(merged[key], dict) and isinstance(value, dict): 45 | # Both are dicts, recurse 46 | merged[key] = Utils.merge_dicts(merged[key], value) 47 | else: 48 | # Add dict2 value if dict2 value is not exists 49 | if not merged.get(key, None): 50 | merged[key] = value 51 | else: 52 | # Key not in dict1, simply add it 53 | merged[key] = value 54 | 55 | return merged 56 | 57 | @staticmethod 58 | def lowercase_dict_keys(input_dict: dict, recursive: bool = False): 59 | if not isinstance(input_dict, dict): 60 | return input_dict 61 | 62 | new_dict = {} 63 | for key, value in input_dict.items(): 64 | if isinstance(value, dict) and recursive: 65 | value = Utils.lowercase_dict_keys(value) 66 | if isinstance(key, str): 67 | key = key.lower() 68 | 69 | new_dict[key] = value 70 | 71 | return new_dict 72 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "opendbt" 7 | version = "0.14.0" 8 | authors = [ 9 | { name = "Memiiso Organization" }, 10 | ] 11 | description = "opendbt dbt-core with additional features" 12 | readme = "README.md" 13 | requires-python = ">=3.8" 14 | keywords = ["dbt-core", "dbt"] 15 | license = { text = "Apache License 2.0" } 16 | classifiers = [ 17 | "Development Status :: 5 - Production/Stable", 18 | "Programming Language :: Python :: 3", 19 | ] 20 | dependencies = [ 21 | "dbt-duckdb>=1.6", 22 | "sqlfluff", 23 | "sqlfluff-templater-dbt", 24 | "sqlglot", 25 | "tqdm", 26 | "filelock" 27 | ] 28 | [project.optional-dependencies] 29 | airflow = ["apache-airflow"] 30 | test = ["testcontainers>=4.10", "apache-airflow", "pylint", "dlt[duckdb]"] 31 | dev = ["mkdocs-material"] 32 | 33 | [tool.setuptools] 34 | include-package-data = true 35 | packages = ["opendbt"] 36 | 37 | [project.scripts] 38 | opendbt = "opendbt.__main__:main" 39 | 40 | [project.urls] 41 | Homepage = "https://github.com/memiiso/opendbt" 42 | Documentation = "https://github.com/memiiso/opendbt" 43 | Repository = "https://github.com/memiiso/opendbt" 44 | -------------------------------------------------------------------------------- /tests/base_dbt_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest import TestCase 3 | 4 | from dbt.version import __version__ as dbt_version_str 5 | from dbt.version import get_installed_version as get_dbt_version 6 | 7 | from opendbt import OpenDbtCli 8 | 9 | 10 | class BaseDbtTest(TestCase): 11 | TESTS_ROOT = Path(__file__).parent 12 | PROJECT_ROOT = TESTS_ROOT.parent 13 | RESOURCES_DIR = TESTS_ROOT.joinpath("resources") 14 | DBTCORE_DIR = RESOURCES_DIR.joinpath("dbtcore") 15 | DBTFINANCE_DIR = RESOURCES_DIR.joinpath("dbtfinance") 16 | DBT_VERSION = get_dbt_version() 17 | DBT_VERSION_STR = dbt_version_str 18 | 19 | @classmethod 20 | def setUpClass(cls): 21 | BaseDbtTest.PROJECT_ROOT.joinpath("dev.duckdb").unlink(missing_ok=True) 22 | BaseDbtTest.RESOURCES_DIR.joinpath("dev.duckdb").unlink(missing_ok=True) 23 | 24 | dpf = OpenDbtCli(project_dir=BaseDbtTest.DBTFINANCE_DIR, profiles_dir=BaseDbtTest.DBTFINANCE_DIR) 25 | dpc = OpenDbtCli(project_dir=BaseDbtTest.DBTCORE_DIR, profiles_dir=BaseDbtTest.DBTCORE_DIR) 26 | dpf.invoke(args=["clean"]) 27 | dpc.invoke(args=["clean"]) 28 | 29 | def setUp(self): 30 | # Setup actions to be performed before each test 31 | BaseDbtTest.PROJECT_ROOT.joinpath("dev.duckdb").unlink(missing_ok=True) 32 | BaseDbtTest.RESOURCES_DIR.joinpath("dev.duckdb").unlink(missing_ok=True) -------------------------------------------------------------------------------- /tests/resources/airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:slim-2.5.2-python3.8 2 | LABEL authors="opendbt" 3 | 4 | # install additional packages 5 | COPY --chown=airflow:airflow opendbt /tmp/opendbt/opendbt 6 | COPY --chown=airflow:airflow README.md /tmp/opendbt/README.md 7 | COPY --chown=airflow:airflow pyproject.toml /tmp/opendbt/pyproject.toml 8 | 9 | RUN pip install dbt-core==1.8.* 10 | RUN pip install dbt-duckdb==1.8.* 11 | RUN pip install opendbt 12 | EXPOSE 8080 13 | -------------------------------------------------------------------------------- /tests/resources/airflow/airflow/airflow.cfg: -------------------------------------------------------------------------------- 1 | # Default airflow config of the docker image 2 | [core] 3 | dags_folder = /opt/airflow/dags 4 | hostname_callable = airflow.utils.net.getfqdn 5 | default_timezone = utc 6 | executor = SequentialExecutor 7 | parallelism = 32 8 | max_active_tasks_per_dag = 16 9 | dags_are_paused_at_creation = True 10 | max_active_runs_per_dag = 16 11 | load_examples = True 12 | plugins_folder = /opt/airflow/plugins 13 | execute_tasks_new_python_interpreter = False 14 | fernet_key = 15 | donot_pickle = True 16 | dagbag_import_timeout = 30.0 17 | dagbag_import_error_tracebacks = True 18 | dagbag_import_error_traceback_depth = 2 19 | dag_file_processor_timeout = 50 20 | task_runner = StandardTaskRunner 21 | default_impersonation = 22 | security = 23 | unit_test_mode = False 24 | enable_xcom_pickling = False 25 | allowed_deserialization_classes = airflow\..* 26 | killed_task_cleanup_time = 60 27 | dag_run_conf_overrides_params = True 28 | dag_discovery_safe_mode = True 29 | dag_ignore_file_syntax = regexp 30 | default_task_retries = 0 31 | default_task_retry_delay = 300 32 | default_task_weight_rule = downstream 33 | default_task_execution_timeout = 34 | min_serialized_dag_update_interval = 30 35 | compress_serialized_dags = False 36 | min_serialized_dag_fetch_interval = 10 37 | max_num_rendered_ti_fields_per_task = 30 38 | check_slas = True 39 | xcom_backend = airflow.models.xcom.BaseXCom 40 | lazy_load_plugins = True 41 | lazy_discover_providers = True 42 | hide_sensitive_var_conn_fields = True 43 | sensitive_var_conn_names = 44 | default_pool_task_slot_count = 128 45 | max_map_length = 1024 46 | daemon_umask = 0o077 47 | [database] 48 | sql_alchemy_conn = sqlite:////opt/airflow/airflow.db 49 | sql_engine_encoding = utf-8 50 | sql_alchemy_pool_enabled = True 51 | sql_alchemy_pool_size = 5 52 | sql_alchemy_max_overflow = 10 53 | sql_alchemy_pool_recycle = 1800 54 | sql_alchemy_pool_pre_ping = True 55 | sql_alchemy_schema = 56 | load_default_connections = True 57 | max_db_retries = 3 58 | [logging] 59 | base_log_folder = /opt/airflow/logs 60 | remote_logging = False 61 | remote_log_conn_id = 62 | google_key_path = 63 | remote_base_log_folder = 64 | encrypt_s3_logs = False 65 | logging_level = INFO 66 | celery_logging_level = 67 | fab_logging_level = WARNING 68 | logging_config_class = 69 | colored_console_log = True 70 | colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s 71 | colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter 72 | log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s 73 | simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s 74 | dag_processor_log_target = file 75 | dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s 76 | log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware 77 | task_log_prefix_template = 78 | log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log 79 | log_processor_filename_template = {{ filename }}.log 80 | dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log 81 | task_log_reader = task 82 | extra_logger_names = 83 | worker_log_server_port = 8793 84 | [metrics] 85 | statsd_on = False 86 | statsd_host = localhost 87 | statsd_port = 8125 88 | statsd_prefix = airflow 89 | statsd_allow_list = 90 | stat_name_handler = 91 | statsd_datadog_enabled = False 92 | statsd_datadog_tags = 93 | 94 | [secrets] 95 | # backend = airflow.providers.hashicorp.secrets.vault.VaultBackend 96 | # backend_kwargs = {"connections_path": "connections", "variables_path": "variables", "mount_point": "airflow", "url": "http://127.0.0.1:8200"} 97 | backend = 98 | backend_kwargs = 99 | 100 | [cli] 101 | api_client = airflow.api.client.local_client 102 | endpoint_url = http://localhost:8080 103 | [debug] 104 | fail_fast = False 105 | [api] 106 | enable_experimental_api = False 107 | auth_backends = airflow.api.auth.backend.session 108 | maximum_page_limit = 100 109 | fallback_page_limit = 100 110 | google_oauth2_audience = 111 | google_key_path = 112 | access_control_allow_headers = 113 | access_control_allow_methods = 114 | access_control_allow_origins = 115 | [lineage] 116 | backend = 117 | [atlas] 118 | sasl_enabled = False 119 | host = 120 | port = 21000 121 | username = 122 | password = 123 | [operators] 124 | default_owner = airflow 125 | default_cpus = 1 126 | default_ram = 512 127 | default_disk = 512 128 | default_gpus = 0 129 | default_queue = default 130 | allow_illegal_arguments = False 131 | [hive] 132 | default_hive_mapred_queue = 133 | [webserver] 134 | base_url = http://localhost:8080 135 | default_ui_timezone = UTC 136 | web_server_host = 0.0.0.0 137 | web_server_port = 8080 138 | web_server_ssl_cert = 139 | web_server_ssl_key = 140 | session_backend = database 141 | web_server_master_timeout = 120 142 | web_server_worker_timeout = 120 143 | worker_refresh_batch_size = 1 144 | worker_refresh_interval = 6000 145 | reload_on_plugin_change = False 146 | secret_key = KpWSnDmjuxdEAVePCn1T4Q== 147 | workers = 4 148 | worker_class = sync 149 | access_logfile = - 150 | error_logfile = - 151 | access_logformat = 152 | expose_config = False 153 | expose_hostname = False 154 | expose_stacktrace = False 155 | dag_default_view = grid 156 | dag_orientation = LR 157 | log_fetch_timeout_sec = 5 158 | log_fetch_delay_sec = 2 159 | log_auto_tailing_offset = 30 160 | log_animation_speed = 1000 161 | hide_paused_dags_by_default = False 162 | page_size = 100 163 | navbar_color = #fff 164 | default_dag_run_display_number = 25 165 | enable_proxy_fix = False 166 | proxy_fix_x_for = 1 167 | proxy_fix_x_proto = 1 168 | proxy_fix_x_host = 1 169 | proxy_fix_x_port = 1 170 | proxy_fix_x_prefix = 1 171 | cookie_secure = False 172 | cookie_samesite = Lax 173 | default_wrap = False 174 | x_frame_enabled = True 175 | show_recent_stats_for_completed_runs = True 176 | update_fab_perms = True 177 | session_lifetime_minutes = 43200 178 | instance_name_has_markup = False 179 | auto_refresh_interval = 3 180 | warn_deployment_exposure = True 181 | audit_view_excluded_events = gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data 182 | [email] 183 | email_backend = airflow.utils.email.send_email_smtp 184 | email_conn_id = smtp_default 185 | default_email_on_retry = True 186 | default_email_on_failure = True 187 | [smtp] 188 | smtp_host = localhost 189 | smtp_starttls = True 190 | smtp_ssl = False 191 | smtp_port = 25 192 | smtp_mail_from = airflow@example.com 193 | smtp_timeout = 30 194 | smtp_retry_limit = 5 195 | [sentry] 196 | sentry_on = false 197 | sentry_dsn = 198 | [local_kubernetes_executor] 199 | kubernetes_queue = kubernetes 200 | [celery_kubernetes_executor] 201 | kubernetes_queue = kubernetes 202 | [celery] 203 | celery_app_name = airflow.executors.celery_executor 204 | worker_concurrency = 16 205 | worker_prefetch_multiplier = 1 206 | worker_enable_remote_control = true 207 | broker_url = redis://redis:6379/0 208 | flower_host = 0.0.0.0 209 | flower_url_prefix = 210 | flower_port = 5555 211 | flower_basic_auth = 212 | sync_parallelism = 0 213 | celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG 214 | ssl_active = False 215 | ssl_key = 216 | ssl_cert = 217 | ssl_cacert = 218 | pool = prefork 219 | operation_timeout = 1.0 220 | task_track_started = True 221 | task_adoption_timeout = 600 222 | stalled_task_timeout = 0 223 | task_publish_max_retries = 3 224 | worker_precheck = False 225 | [celery_broker_transport_options] 226 | [dask] 227 | cluster_address = 127.0.0.1:8786 228 | tls_ca = 229 | tls_cert = 230 | tls_key = 231 | [scheduler] 232 | job_heartbeat_sec = 5 233 | scheduler_heartbeat_sec = 5 234 | num_runs = -1 235 | scheduler_idle_sleep_time = 1 236 | min_file_process_interval = 30 237 | parsing_cleanup_interval = 60 238 | dag_dir_list_interval = 300 239 | print_stats_interval = 30 240 | pool_metrics_interval = 5.0 241 | scheduler_health_check_threshold = 30 242 | enable_health_check = False 243 | scheduler_health_check_server_port = 8974 244 | orphaned_tasks_check_interval = 300.0 245 | child_process_log_directory = /opt/airflow/logs/scheduler 246 | scheduler_zombie_task_threshold = 300 247 | zombie_detection_interval = 10.0 248 | catchup_by_default = True 249 | ignore_first_depends_on_past_by_default = True 250 | max_tis_per_query = 512 251 | use_row_level_locking = True 252 | max_dagruns_to_create_per_loop = 10 253 | max_dagruns_per_loop_to_schedule = 20 254 | schedule_after_task_execution = True 255 | parsing_processes = 2 256 | file_parsing_sort_mode = modified_time 257 | standalone_dag_processor = False 258 | max_callbacks_per_loop = 20 259 | dag_stale_not_seen_duration = 600 260 | use_job_schedule = True 261 | allow_trigger_in_future = False 262 | trigger_timeout_check_interval = 15 263 | [triggerer] 264 | default_capacity = 1000 265 | [kerberos] 266 | ccache = /tmp/airflow_krb5_ccache 267 | principal = airflow 268 | reinit_frequency = 3600 269 | kinit_path = kinit 270 | keytab = airflow.keytab 271 | forwardable = True 272 | include_ip = True 273 | [elasticsearch] 274 | host = 275 | log_id_template = {dag_id}-{task_id}-{run_id}-{map_index}-{try_number} 276 | end_of_log_mark = end_of_log 277 | frontend = 278 | write_stdout = False 279 | json_format = False 280 | json_fields = asctime, filename, lineno, levelname, message 281 | host_field = host 282 | offset_field = offset 283 | [elasticsearch_configs] 284 | use_ssl = False 285 | verify_certs = True 286 | [kubernetes_executor] 287 | pod_template_file = 288 | worker_container_repository = 289 | worker_container_tag = 290 | namespace = default 291 | delete_worker_pods = True 292 | delete_worker_pods_on_failure = False 293 | worker_pods_creation_batch_size = 1 294 | multi_namespace_mode = False 295 | in_cluster = True 296 | kube_client_request_args = 297 | delete_option_kwargs = 298 | enable_tcp_keepalive = True 299 | tcp_keep_idle = 120 300 | tcp_keep_intvl = 30 301 | tcp_keep_cnt = 6 302 | verify_ssl = True 303 | worker_pods_pending_timeout = 300 304 | worker_pods_pending_timeout_check_interval = 120 305 | worker_pods_queued_check_interval = 60 306 | worker_pods_pending_timeout_batch_size = 100 307 | [sensors] 308 | default_timeout = 604800 309 | -------------------------------------------------------------------------------- /tests/resources/airflow/airflow/webserver_config.py: -------------------------------------------------------------------------------- 1 | """Default configuration for the Airflow webserver.""" 2 | from __future__ import annotations 3 | 4 | import os 5 | 6 | from flask_appbuilder.const import AUTH_DB 7 | 8 | # from airflow.www.fab_security.manager import AUTH_LDAP 9 | # from airflow.www.fab_security.manager import AUTH_OAUTH 10 | # from airflow.www.fab_security.manager import AUTH_OID 11 | # from airflow.www.fab_security.manager import AUTH_REMOTE_USER 12 | 13 | 14 | basedir = os.path.abspath(os.path.dirname(__file__)) 15 | 16 | # Flask-WTF flag for CSRF 17 | WTF_CSRF_ENABLED = True 18 | WTF_CSRF_TIME_LIMIT = None 19 | 20 | # ---------------------------------------------------- 21 | # AUTHENTICATION CONFIG 22 | # ---------------------------------------------------- 23 | # For details on how to set up each of the following authentication, see 24 | # http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods 25 | # for details. 26 | 27 | # The authentication type 28 | # AUTH_OID : Is for OpenID 29 | # AUTH_DB : Is for database 30 | # AUTH_LDAP : Is for LDAP 31 | # AUTH_REMOTE_USER : Is for using REMOTE_USER from web server 32 | # AUTH_OAUTH : Is for OAuth 33 | AUTH_TYPE = AUTH_DB 34 | 35 | # Uncomment to setup Full admin role name 36 | # AUTH_ROLE_ADMIN = 'Admin' 37 | 38 | # Uncomment and set to desired role to enable access without authentication 39 | AUTH_ROLE_PUBLIC = 'Admin' 40 | 41 | # Will allow user self registration 42 | # AUTH_USER_REGISTRATION = True 43 | 44 | # The recaptcha it's automatically enabled for user self registration is active and the keys are necessary 45 | # RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY 46 | # RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY 47 | 48 | # Config for Flask-Mail necessary for user self registration 49 | # MAIL_SERVER = 'smtp.gmail.com' 50 | # MAIL_USE_TLS = True 51 | # MAIL_USERNAME = 'yourappemail@gmail.com' 52 | # MAIL_PASSWORD = 'passwordformail' 53 | # MAIL_DEFAULT_SENDER = 'sender@gmail.com' 54 | 55 | # The default user self registration role 56 | # AUTH_USER_REGISTRATION_ROLE = "Public" 57 | 58 | # When using OAuth Auth, uncomment to setup provider(s) info 59 | # Google OAuth example: 60 | # OAUTH_PROVIDERS = [{ 61 | # 'name':'google', 62 | # 'token_key':'access_token', 63 | # 'icon':'fa-google', 64 | # 'remote_app': { 65 | # 'api_base_url':'https://www.googleapis.com/oauth2/v2/', 66 | # 'client_kwargs':{ 67 | # 'scope': 'email profile' 68 | # }, 69 | # 'access_token_url':'https://accounts.google.com/o/oauth2/token', 70 | # 'authorize_url':'https://accounts.google.com/o/oauth2/auth', 71 | # 'request_token_url': None, 72 | # 'client_id': GOOGLE_KEY, 73 | # 'client_secret': GOOGLE_SECRET_KEY, 74 | # } 75 | # }] 76 | 77 | # When using LDAP Auth, setup the ldap server 78 | # AUTH_LDAP_SERVER = "ldap://ldapserver.new" 79 | 80 | # When using OpenID Auth, uncomment to setup OpenID providers. 81 | # example for OpenID authentication 82 | # OPENID_PROVIDERS = [ 83 | # { 'name': 'Yahoo', 'url': 'https://me.yahoo.com' }, 84 | # { 'name': 'AOL', 'url': 'http://openid.aol.com/' }, 85 | # { 'name': 'Flickr', 'url': 'http://www.flickr.com/' }, 86 | # { 'name': 'MyOpenID', 'url': 'https://www.myopenid.com' }] 87 | 88 | # ---------------------------------------------------- 89 | # Theme CONFIG 90 | # ---------------------------------------------------- 91 | # Flask App Builder comes up with a number of predefined themes 92 | # that you can use for Apache Airflow. 93 | # http://flask-appbuilder.readthedocs.io/en/latest/customizing.html#changing-themes 94 | # Please make sure to remove "navbar_color" configuration from airflow.cfg 95 | # in order to fully utilize the theme. (or use that property in conjunction with theme) 96 | # APP_THEME = "bootstrap-theme.css" # default bootstrap 97 | # APP_THEME = "amelia.css" 98 | # APP_THEME = "cerulean.css" 99 | # APP_THEME = "cosmo.css" 100 | # APP_THEME = "cyborg.css" 101 | # APP_THEME = "darkly.css" 102 | # APP_THEME = "flatly.css" 103 | # APP_THEME = "journal.css" 104 | # APP_THEME = "lumen.css" 105 | # APP_THEME = "paper.css" 106 | # APP_THEME = "readable.css" 107 | # APP_THEME = "sandstone.css" 108 | # APP_THEME = "simplex.css" 109 | # APP_THEME = "slate.css" 110 | # APP_THEME = "solar.css" 111 | # APP_THEME = "spacelab.css" 112 | # APP_THEME = "superhero.css" 113 | # APP_THEME = "united.css" 114 | # APP_THEME = "yeti.css" 115 | -------------------------------------------------------------------------------- /tests/resources/airflow/dags/dbt_mesh_workflow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from airflow import DAG 4 | from airflow.utils.dates import days_ago 5 | 6 | from opendbt.airflow import OpenDbtAirflowProject 7 | 8 | default_args = { 9 | 'owner': 'airflow', 10 | 'depends_on_past': False, 11 | 'email_on_failure': False, 12 | 'email_on_retry': False, 13 | 'retries': 1 14 | } 15 | 16 | with DAG( 17 | dag_id='dbt_mesh_workflow', 18 | default_args=default_args, 19 | description='DAG To run multiple dbt projects', 20 | schedule_interval=None, 21 | start_date=days_ago(3), 22 | catchup=False, 23 | max_active_runs=1 24 | ) as dag: 25 | DBT_PROJ_DIR = Path("/opt/dbtfinance") 26 | 27 | p = OpenDbtAirflowProject(project_dir=DBT_PROJ_DIR, profiles_dir=DBT_PROJ_DIR, target='dev') 28 | p.load_dbt_tasks(dag=dag, include_singular_tests=True, include_dbt_seeds=True) 29 | -------------------------------------------------------------------------------- /tests/resources/airflow/dags/dbt_tests_workflow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from airflow import DAG 4 | from airflow.operators.empty import EmptyOperator 5 | from airflow.utils.dates import days_ago 6 | 7 | from opendbt.airflow import OpenDbtAirflowProject 8 | 9 | default_args = { 10 | 'owner': 'airflow', 11 | 'depends_on_past': False, 12 | 'email_on_failure': False, 13 | 'email_on_retry': False, 14 | 'retries': 1 15 | } 16 | 17 | with DAG( 18 | dag_id='dbt_tests_workflow', 19 | default_args=default_args, 20 | description='DAG To run dbt tests', 21 | schedule_interval=None, 22 | start_date=days_ago(3), 23 | catchup=False, 24 | max_active_runs=1 25 | ) as dag: 26 | start = EmptyOperator(task_id="start") 27 | end = EmptyOperator(task_id="end") 28 | 29 | DBT_PROJ_DIR = Path("/opt/dbtcore") 30 | 31 | p = OpenDbtAirflowProject(project_dir=DBT_PROJ_DIR, profiles_dir=DBT_PROJ_DIR, target='dev') 32 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end, resource_type='test') 33 | -------------------------------------------------------------------------------- /tests/resources/airflow/dags/dbt_workflow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from airflow import DAG 4 | from airflow.operators.empty import EmptyOperator 5 | from airflow.utils.dates import days_ago 6 | 7 | from opendbt.airflow import OpenDbtAirflowProject 8 | 9 | default_args = { 10 | 'owner': 'airflow', 11 | 'depends_on_past': False, 12 | 'email_on_failure': False, 13 | 'email_on_retry': False, 14 | 'retries': 1 15 | } 16 | 17 | with DAG( 18 | dag_id='dbt_workflow', 19 | default_args=default_args, 20 | description='DAG To run dbt', 21 | schedule_interval=None, 22 | start_date=days_ago(3), 23 | catchup=False, 24 | max_active_runs=1 25 | ) as dag: 26 | start = EmptyOperator(task_id="start") 27 | end = EmptyOperator(task_id="end") 28 | 29 | DBT_PROJ_DIR = Path("/opt/dbtcore") 30 | 31 | p = OpenDbtAirflowProject(project_dir=DBT_PROJ_DIR, profiles_dir=DBT_PROJ_DIR, target='dev') 32 | p.load_dbt_tasks(dag=dag, start_node=start, end_node=end, include_singular_tests=True, include_dbt_seeds=True) 33 | -------------------------------------------------------------------------------- /tests/resources/airflow/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | airflow: 5 | build: 6 | dockerfile: tests/resources/airflow/Dockerfile 7 | # NOTE The path can be absolute or relative. 8 | # If it is relative, it is resolved from the Compose file's parent folder. 9 | context: ./../../../ 10 | image: opendbt_airflow 11 | entrypoint: airflow standalone 12 | volumes: 13 | # NOTE The path can be absolute or relative. 14 | - ./airflow/webserver_config.py:/opt/airflow/webserver_config.py 15 | - ./airflow/airflow.cfg:/opt/airflow/airflow.cfg 16 | - ./dags:/opt/airflow/dags:rw 17 | - ./plugins:/opt/airflow/plugins:rw 18 | - ./../dbtcore:/opt/dbtcore:rw 19 | - ./../dbtfinance:/opt/dbtfinance:rw 20 | - ./../../../opendbt/macros:/opt/dbtcore/macros:rw 21 | environment: 22 | - AIRFLOW__WEBSERVER__INSTANCE_NAME=LOCAL 23 | - AIRFLOW_ENVIRONMENT=LOCAL 24 | ports: 25 | - "8080" 26 | -------------------------------------------------------------------------------- /tests/resources/airflow/plugins/airflow_dbtdocs_page.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from opendbt.airflow import plugin 4 | 5 | # create public page on airflow server to serve DBT docs 6 | airflow_dbtdocs_page = plugin.init_plugins_dbtdocs_page(Path("/opt/dbtcore/target")) 7 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/.sqlfluff: -------------------------------------------------------------------------------- 1 | [sqlfluff] 2 | templater = dbt 3 | dialect = duckdb 4 | # This change (from jinja to dbt templater) will make linting slower 5 | # because linting will first compile dbt code into data warehouse code. 6 | runaway_limit = 1000 7 | max_line_length = 180 8 | indent_unit = space 9 | 10 | [sqlfluff:indentation] 11 | tab_space_size = 4 12 | 13 | [sqlfluff:layout:type:comma] 14 | spacing_before = touch 15 | line_position = trailing 16 | 17 | # For rule specific configuration, use dots between the names exactly 18 | # as you would in .sqlfluff. In the background, SQLFluff will unpack the 19 | # configuration paths accordingly. 20 | [tool.sqlfluff.rules.capitalisation.keywords] 21 | capitalisation_policy = "upper" 22 | 23 | # The default configuration for capitalisation rules is "consistent" 24 | # which will auto-detect the setting from the rest of the file. This 25 | # is less desirable in a new project and you may find this (slightly 26 | # more strict) setting more useful. 27 | # Typically we find users rely on syntax highlighting rather than 28 | # capitalisation to distinguish between keywords and identifiers. 29 | # Clearly, if your organisation has already settled on uppercase 30 | # formatting for any of these syntax elements then set them to "upper". 31 | # See https://stackoverflow.com/questions/608196/why-should-i-capitalize-my-sql-keywords-is-there-a-good-reason 32 | [sqlfluff:rules:capitalisation.keywords] 33 | capitalisation_policy = upper 34 | [sqlfluff:rules:capitalisation.identifiers] 35 | capitalisation_policy = upper 36 | [sqlfluff:rules:capitalisation.functions] 37 | extended_capitalisation_policy = upper 38 | # [sqlfluff:rules:capitalisation.literals] 39 | # capitalisation_policy = lower 40 | [sqlfluff:rules:capitalisation.types] 41 | extended_capitalisation_policy = upper 42 | 43 | [sqlfluff:rules:aliasing.table] 44 | aliasing = explicit 45 | 46 | [sqlfluff:rules:aliasing.column] 47 | aliasing = explicit 48 | 49 | [sqlfluff:rules:aliasing.expression] 50 | allow_scalar = False 51 | 52 | [sqlfluff:rules:ambiguous.column_references] # Number in group by 53 | group_by_and_order_by_style = implicit -------------------------------------------------------------------------------- /tests/resources/dbtcore/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbtcore' 2 | version: '1.0.0' 3 | 4 | profile: 'dbtcore' 5 | 6 | # include opendbt macros 7 | macro-paths: [ "macros", "../../../opendbt/macros/" ] 8 | # use opendbt index.html for docs 9 | docs-paths: [ "../../../opendbt/docs/" ] 10 | 11 | clean-targets: 12 | - "target" 13 | - "dbt_packages" 14 | - "logs" 15 | 16 | models: 17 | dbtcore: 18 | +materialized: table 19 | 20 | vars: 21 | dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom 22 | dbt_callbacks: opendbt.examples.email_dbt_test_callback -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_core_table1.sql: -------------------------------------------------------------------------------- 1 | with source_data as ( 2 | select 1 as id, 'row1' as row_data 3 | union all 4 | select 2 as id, 'row1' as row_data 5 | ) 6 | 7 | SELECT * 8 | FROM source_data -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_executedlt_model.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | from dlt.pipeline import TPipeline 3 | 4 | 5 | @dlt.resource( 6 | columns={"event_tstamp": {"data_type": "timestamp", "precision": 3}}, 7 | primary_key="event_id", 8 | ) 9 | def events(): 10 | yield [{"event_id": 1, "event_tstamp": "2024-07-30T10:00:00.123"}, 11 | {"event_id": 2, "event_tstamp": "2025-02-30T10:00:00.321"}] 12 | 13 | 14 | def model(dbt, pipeline: TPipeline): 15 | """ 16 | 17 | :param dbt: 18 | :param pipeline: Pre-configured dlt pipeline. dlt target connection and dataset is pre-set using the model config! 19 | :return: 20 | """ 21 | dbt.config(materialized="executedlt") 22 | print("========================================================") 23 | print(f"INFO: DLT Pipeline pipeline_name:{pipeline.pipeline_name}") 24 | print(f"INFO: DLT Pipeline dataset_name:{pipeline.dataset_name}") 25 | print(f"INFO: DLT Pipeline dataset_name:{pipeline}") 26 | print(f"INFO: DLT Pipeline staging:{pipeline.staging}") 27 | print(f"INFO: DLT Pipeline destination:{pipeline.destination}") 28 | print(f"INFO: DLT Pipeline _pipeline_storage:{pipeline._pipeline_storage}") 29 | print(f"INFO: DLT Pipeline _schema_storage:{pipeline._schema_storage}") 30 | print(f"INFO: DLT Pipeline state:{pipeline.state}") 31 | print(f"INFO: DBT this:{dbt.this}") 32 | print("========================================================") 33 | load_info = pipeline.run(events(), table_name=str(str(dbt.this).split('.')[-1]).strip('"')) 34 | print(load_info) 35 | row_counts = pipeline.last_trace.last_normalize_info 36 | print(row_counts) 37 | print("========================================================") 38 | return None 39 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_executepython_dlt_model.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.resource( 5 | columns={"event_tstamp": {"data_type": "timestamp", "precision": 3}}, 6 | primary_key="event_id", 7 | ) 8 | def events(): 9 | yield [{"event_id": 1, "event_tstamp": "2024-07-30T10:00:00.123"}, 10 | {"event_id": 2, "event_tstamp": "2025-02-30T10:00:00.321"}] 11 | 12 | 13 | def model(dbt, session): 14 | dbt.config(materialized="executepython") 15 | print("========================================================") 16 | print(f"INFO: DLT Version:{dlt.version.__version__}") 17 | print(f"INFO: DBT Duckdb Session:{type(session)}") 18 | print(f"INFO: DBT Duckdb Connection:{type(session._env.conn)}") 19 | print("========================================================") 20 | p = dlt.pipeline( 21 | pipeline_name="dbt_dlt", 22 | destination=dlt.destinations.duckdb(session._env.conn), 23 | dataset_name=dbt.this.schema, 24 | dev_mode=False, 25 | ) 26 | load_info = p.run(events()) 27 | print(load_info) 28 | row_counts = p.last_trace.last_normalize_info 29 | print(row_counts) 30 | print("========================================================") 31 | return None 32 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_executepython_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | from dbt import version 5 | 6 | 7 | def print_info(): 8 | _str = f"name:{os.name}, system:{platform.system()} release:{platform.release()}" 9 | _str += f"\npython version:{platform.python_version()}, dbt:{version.__version__}" 10 | print(_str) 11 | 12 | 13 | def model(dbt, session): 14 | dbt.config(materialized="executepython") 15 | print("==================================================") 16 | print("========IM LOCALLY EXECUTED PYTHON MODEL==========") 17 | print("==================================================") 18 | print_info() 19 | print("==================================================") 20 | print("===============MAKE DBT GREAT AGAIN===============") 21 | print("==================================================") 22 | return None 23 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_executesql_dbt_model.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='executesql') }} 2 | 3 | 4 | create or replace table my_execute_dbt_model 5 | as 6 | 7 | select 123 as column1 -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_failing_dbt_model.sql: -------------------------------------------------------------------------------- 1 | select non_exists_column as my_failing_column 2 | from {{ ref('my_first_dbt_model') }} 3 | where id = 1 4 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_first_dbt_model.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='table') }} 2 | 3 | with source_data as ( 4 | select 1 as id, 'test-value' as data_value, 'test-value' as column_3 5 | union all 6 | select 1 as id, 'test-value' as data_value, 'test-value' as column_3 7 | union all 8 | select 2 as id, 'test-value' as data_value, 'test-value' as column_3 9 | union all 10 | select null as id, 'test-value' as data_value, 'test-value' as column_3 11 | ) 12 | SELECT * 13 | FROM source_data 14 | -- where id is not null 15 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/my_second_dbt_model.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | t1.id AS pk_id, 3 | t1.data_value AS data_value1, 4 | CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2, 5 | t3.event_tstamp AS event_tstamp 6 | FROM {{ ref('my_first_dbt_model') }} AS t1 7 | LEFT JOIN {{ ref('my_core_table1') }} AS t2 ON t1.id = t2.id 8 | LEFT JOIN {{ ref('my_executedlt_model') }} AS t3 ON t1.id = t3.event_id 9 | WHERE t1.id IN (1, 2) 10 | -------------------------------------------------------------------------------- /tests/resources/dbtcore/models/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: my_first_dbt_model 6 | description: > 7 | # A starter dbt model 8 | 9 | this is a __sample__ model used as an example 10 | columns: 11 | - name: data_value 12 | - name: column_3 13 | - name: id 14 | description: "The **primary key** for this table" 15 | tests: 16 | - unique: 17 | config: 18 | severity: error 19 | error_if: ">1000" 20 | warn_if: ">0" 21 | - not_null: 22 | config: 23 | severity: error 24 | error_if: ">1000" 25 | warn_if: ">0" 26 | 27 | - name: my_second_dbt_model 28 | description: "A starter dbt model" 29 | columns: 30 | - name: pk_id 31 | description: "The primary key for this table" 32 | data_tests: 33 | - unique 34 | - not_null 35 | - name: data_value1 36 | - name: data_value2 37 | - name: event_tstamp 38 | - name: my_core_table1 39 | columns: 40 | - name: id 41 | - name: row_data 42 | - name: my_executedlt_model 43 | columns: 44 | - name: event_id 45 | - name: event_tstamp 46 | - name: my_executepython_model 47 | columns: 48 | - name: event_id 49 | - name: event_tstamp -------------------------------------------------------------------------------- /tests/resources/dbtcore/profiles.yml: -------------------------------------------------------------------------------- 1 | dbtcore: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | schema: core 6 | path: ./../dev.duckdb 7 | threads: 1 8 | 9 | target: dev 10 | -------------------------------------------------------------------------------- /tests/resources/dbtfinance/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /tests/resources/dbtfinance/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbtfinance' 2 | version: '1.0.0' 3 | 4 | profile: 'dbtfinance' 5 | 6 | # directories to be removed by `dbt clean` 7 | clean-targets: 8 | - "target" 9 | - "dbt_packages" 10 | - "logs" 11 | 12 | models: 13 | # ensure referenced models are used with correct schema 14 | dbtcore: 15 | schema: "core" -------------------------------------------------------------------------------- /tests/resources/dbtfinance/dependencies.yml: -------------------------------------------------------------------------------- 1 | #packages: 2 | # - package: dbt-labs/dbt_utils 3 | # version: 1.1.1 4 | 5 | # case-sensitive and matches the 'name' in the 'dbt_project.yml' 6 | projects: 7 | - name: dbtcore -------------------------------------------------------------------------------- /tests/resources/dbtfinance/macros/generate_schema_name.sql: -------------------------------------------------------------------------------- 1 | {% macro generate_schema_name(custom_schema_name, node) -%} 2 | 3 | {%- set default_schema = target.schema -%} 4 | {%- if custom_schema_name is none -%} 5 | 6 | {{ default_schema }} 7 | 8 | {%- else -%} 9 | 10 | {# HERE we are overriding `generate_schema_name` macro generation. 11 | which is concatenating custom schema name and default schema. 12 | #} 13 | {{ custom_schema_name | trim }} 14 | 15 | {%- endif -%} 16 | 17 | {%- endmacro %} -------------------------------------------------------------------------------- /tests/resources/dbtfinance/models/my_cross_project_ref_model.sql: -------------------------------------------------------------------------------- 1 | 2 | select 3 | id, 4 | row_data, 5 | count(*) as num_rows 6 | from {{ ref('dbtcore', 'my_core_table1') }} 7 | -- fake second dependency {{ source('core', 'my_executepython_model') }} 8 | group by 1,2 -------------------------------------------------------------------------------- /tests/resources/dbtfinance/models/sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | # defining `dbtcore` project models as source! 5 | - name: core 6 | schema: core 7 | tables: 8 | - name: my_executepython_model 9 | - name: my_executepython_dlt_model -------------------------------------------------------------------------------- /tests/resources/dbtfinance/profiles.yml: -------------------------------------------------------------------------------- 1 | dbtfinance: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | schema: finance 6 | path: ./../dev.duckdb 7 | threads: 1 8 | 9 | target: dev 10 | -------------------------------------------------------------------------------- /tests/test_airflow.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from pathlib import Path 4 | from time import sleep 5 | 6 | from testcontainers.compose import DockerCompose 7 | 8 | 9 | @unittest.skip("Manual test") 10 | class TestAirflowBase(unittest.TestCase): 11 | """ 12 | Test class used to do airflow tests. 13 | uses airflow docker image and mounts current code into it. 14 | login is disabled all users can access the UI as Admin. Airflow is set up as Public 15 | """ 16 | _compose: DockerCompose = None 17 | resources_dir = Path(__file__).parent.joinpath('resources') 18 | 19 | @classmethod 20 | def setUpClass(cls): 21 | os.chdir(cls.resources_dir.joinpath('airflow').as_posix()) 22 | cls._compose = DockerCompose(cls.resources_dir.joinpath('airflow').as_posix(), 23 | compose_file_name="docker-compose.yaml", 24 | # build=True, 25 | docker_command_path='podman' 26 | ) 27 | cls._compose.stop() 28 | cls._compose.start() 29 | print(f"http://localhost:{cls._compose.get_service_port('airflow', 8080)}/home") 30 | print(f"http://localhost:{cls._compose.get_service_port('airflow', 8080)}/dbtdocs") 31 | print(f"http://localhost:{cls._compose.get_service_port('airflow', 8080)}/dbtdocs/perf_info.json") 32 | 33 | @classmethod 34 | def tearDownClass(cls): 35 | print("Running tearDownClass") 36 | if cls._compose: 37 | cls._compose.stop() 38 | 39 | def __exit__(self, exc_type, exc_val, traceback): 40 | if self._compose: 41 | self._compose.stop() 42 | 43 | def test_start_airflow_local_and_wait(self): 44 | """ 45 | used to deploy the code inside docker airflow locally. UI login is disabled and made public! 46 | useful to run local airflow with the new code changes and check the changes in airflow ui 47 | while its running all the code changes are reflected in airflow after short time. 48 | :return: 49 | """ 50 | sleep(99999999) 51 | -------------------------------------------------------------------------------- /tests/test_catalog.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from base_dbt_test import BaseDbtTest 4 | from opendbt import OpenDbtProject 5 | from opendbt.catalog import OpenDbtCatalog 6 | 7 | 8 | class TestOpenDbtCatalog(BaseDbtTest): 9 | 10 | def test_catalog_loading(self): 11 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR) 12 | dp.run(command="docs", args=['generate']) 13 | catalog = OpenDbtCatalog( 14 | manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'), 15 | catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json')) 16 | self.assertIn("model.dbtfinance.my_cross_project_ref_model", catalog.nodes.keys()) 17 | self.assertIn("model.dbtcore.my_core_table1", catalog.nodes.keys()) 18 | # print(extractor.nodes.get("model.dbtcore.my_core_table1").columns) 19 | model1 = catalog.nodes.get("model.dbtfinance.my_cross_project_ref_model") 20 | model1_schema = model1.db_schema_dict(include_parents=True) 21 | self.assertIn("dev", model1_schema) 22 | self.assertIn("finance", model1_schema["dev"]) 23 | self.assertIn("my_core_table1", model1_schema["dev"]["core"]) 24 | self.assertIn("my_cross_project_ref_model", model1_schema["dev"]["finance"]) 25 | # self.assertIn("row_data", model1_schema["dev"]["main"]['my_core_table1']) 26 | 27 | self.assertIn("num_rows", model1.populate_lineage(catalog.tables2nodes)) 28 | self.assertIn("row_data", model1.populate_lineage(catalog.tables2nodes)) 29 | 30 | @unittest.skip("reason for skipping") 31 | def test_catalog_export(self): 32 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR) 33 | dp.run(command="compile") 34 | dp.run(command="run", args=['--select', '+my_second_dbt_model']) 35 | dp.run(command="docs", args=['generate']) 36 | catalog = OpenDbtCatalog( 37 | manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'), 38 | catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json')) 39 | catalog.export() 40 | 41 | def test_catalog_export_one_node(self): 42 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR) 43 | dp.run(command="compile") 44 | dp.run(command="run", args=['--select', '+my_second_dbt_model']) 45 | dp.run(command="docs", args=['generate']) 46 | catalog = OpenDbtCatalog( 47 | manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'), 48 | catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json')) 49 | node = catalog.node(node_id="model.dbtcore.my_second_dbt_model") 50 | result = node.parent_db_schema_dict() 51 | self.assertIn("my_first_dbt_model", result["dev"]["core"]) 52 | self.assertIn("column_3", result["dev"]["core"]["my_first_dbt_model"]) 53 | -------------------------------------------------------------------------------- /tests/test_custom_adapter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from packaging.version import Version 4 | 5 | from base_dbt_test import BaseDbtTest 6 | from opendbt import OpenDbtProject 7 | 8 | 9 | class TestOpenDbtProject(BaseDbtTest): 10 | 11 | def test_run_with_custom_adapter(self): 12 | if Version(self.DBT_VERSION.to_version_string(skip_matcher=True)) > Version("1.8.0"): 13 | dbt_custom_adapter = 'opendbt.examples.DuckDBAdapterTestingOnlyDbt18' 14 | else: 15 | dbt_custom_adapter = 'opendbt.examples.DuckDBAdapterTestingOnlyDbt17' 16 | 17 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 18 | args=['--vars', f"{{'dbt_custom_adapter': '{dbt_custom_adapter}'}}"]) 19 | with self.assertRaises(Exception) as context: 20 | sys.tracebacklimit = 0 21 | dp.run(command="compile") 22 | self.assertTrue("Custom user defined test adapter activated" in str(context.exception)) 23 | with self.assertRaises(Exception) as context: 24 | sys.tracebacklimit = 0 25 | dp.run(command="compile") 26 | self.assertTrue("Custom user defined test adapter activated" in str(context.exception)) 27 | 28 | def test_run_with_custom_adapter_mmodule_not_found(self): 29 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 30 | args=['--vars', '{dbt_custom_adapter: not.exits.module.MyDbtTestAdapterV1}'] 31 | ) 32 | with self.assertRaises(Exception) as context: 33 | sys.tracebacklimit = 0 34 | dp.run(command="compile") 35 | self.assertTrue("Module of provided adapter not found" in str(context.exception)) 36 | 37 | def test_run_with_custom_adapter_class_not_found(self): 38 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 39 | args=['--vars', '{dbt_custom_adapter: test_custom_adapter.NotExistsAdapterClass}'] 40 | ) 41 | with self.assertRaises(Exception) as context: 42 | sys.tracebacklimit = 0 43 | dp.run(command="compile") 44 | self.assertTrue("as no attribute 'NotExistsAdapterClass'" in str(context.exception)) 45 | 46 | def test_run_with_custom_adapter_wrong_name(self): 47 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 48 | args=['--vars', 'dbt_custom_adapter: test_custom_adapterMyDbtTestAdapterV1'] 49 | ) 50 | with self.assertRaises(Exception) as context: 51 | sys.tracebacklimit = 0 52 | dp.run(command="compile") 53 | self.assertTrue("Unexpected adapter class name" in str(context.exception)) 54 | -------------------------------------------------------------------------------- /tests/test_dbt_docs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import semver 4 | 5 | from base_dbt_test import BaseDbtTest 6 | from opendbt import OpenDbtProject 7 | 8 | 9 | class TestDbtDocs(BaseDbtTest): 10 | 11 | def test_run_docs_generate(self): 12 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 13 | # run to generate run_results.json and run_info.json file 14 | try: 15 | dp.run(command="build") 16 | except: 17 | pass 18 | dp.run(command="docs", args=['generate']) 19 | self.assertTrue(self.DBTCORE_DIR.joinpath('target/catalogl.json').exists()) 20 | if semver.Version.parse(self.DBT_VERSION_STR) >= semver.Version.parse("1.8.0"): 21 | self.assertTrue(self.DBTCORE_DIR.joinpath('target/run_info.json').exists()) 22 | 23 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR) 24 | # dp.run(command="run") 25 | dp.run(command="docs", args=['generate']) 26 | index_html = self.DBTFINANCE_DIR.joinpath('target/index.html').read_text() 27 | # new html docs page 28 | self.assertTrue("tailwindcss" in str(index_html)) 29 | self.assertTrue("vue.global.min.js" in str(index_html)) 30 | self.assertTrue(self.DBTFINANCE_DIR.joinpath('target/catalogl.json').exists()) 31 | 32 | @unittest.skip("reason for skipping") 33 | def test_run_docs_serve(self): 34 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR) 35 | dp.run(command="docs", args=['generate']) 36 | dp.run(command="docs", args=['serve']) 37 | -------------------------------------------------------------------------------- /tests/test_dbt_sqlfluff.py: -------------------------------------------------------------------------------- 1 | from base_dbt_test import BaseDbtTest 2 | from opendbt import OpenDbtProject 3 | 4 | 5 | class TestDbtSqlFluff(BaseDbtTest): 6 | 7 | def test_run_sqlfluff_lint(self): 8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 9 | dp.run(command="sqlfluff", args=['fix']) 10 | dp.run(command="sqlfluff", args=['lint']) 11 | 12 | def test_run_sqlfluff_fix(self): 13 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 14 | dp.run(command="sqlfluff", args=['fix']) 15 | -------------------------------------------------------------------------------- /tests/test_executedlt_materialization.py: -------------------------------------------------------------------------------- 1 | from base_dbt_test import BaseDbtTest 2 | from opendbt import OpenDbtProject 3 | 4 | 5 | class TestOpenDbtProject(BaseDbtTest): 6 | 7 | def test_run_executedlt_materialization(self): 8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 9 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom']) 10 | dp.run(command="run", args=['--select', 'my_executedlt_model']) 11 | -------------------------------------------------------------------------------- /tests/test_executepython_materialization.py: -------------------------------------------------------------------------------- 1 | from base_dbt_test import BaseDbtTest 2 | from opendbt import OpenDbtProject 3 | 4 | 5 | class TestOpenDbtProject(BaseDbtTest): 6 | 7 | def test_run_executepython_materialization(self): 8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 9 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom']) 10 | dp.run(command="run", args=['--select', 'my_executepython_model']) 11 | 12 | def test_run_executepython_dlt_pipeline(self): 13 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 14 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom']) 15 | dp.run(command="run", args=['--select', 'my_executepython_dlt_model']) 16 | 17 | def test_run_executepython_materialization_subprocess(self): 18 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, 19 | args=['--vars', 'dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom']) 20 | dp.run(command="run", args=['--select', 'my_executepython_model'], use_subprocess=True) 21 | -------------------------------------------------------------------------------- /tests/test_executesql_materialization.py: -------------------------------------------------------------------------------- 1 | from base_dbt_test import BaseDbtTest 2 | from opendbt import OpenDbtProject 3 | 4 | 5 | class TestOpenDbtProject(BaseDbtTest): 6 | 7 | def test_run_executesql_materialization(self): 8 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 9 | dp.run(command="run", args=['--select', 'my_executesql_dbt_model']) 10 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from pathlib import Path 4 | from unittest.mock import patch, Mock 5 | 6 | from base_dbt_test import BaseDbtTest 7 | from opendbt.__main__ import main 8 | 9 | 10 | class TestOpenDbtCliMain(BaseDbtTest): 11 | 12 | @patch("opendbt.__main__.OpenDbtCli") 13 | def test_main_with_project_dir_arg(self, mock_cli): 14 | test_project_dir = self.DBTFINANCE_DIR.resolve() 15 | test_profiles_dir = self.DBTFINANCE_DIR.resolve() 16 | sys.argv = ["main.py", "--project-dir", str(test_project_dir), "--profiles-dir", str(test_profiles_dir), "ls"] 17 | mock_instance = Mock(project_dir=test_project_dir, profiles_dir=test_profiles_dir) 18 | mock_cli.return_value = mock_instance 19 | main() 20 | mock_cli.assert_called_once_with(project_dir=test_project_dir, profiles_dir=test_profiles_dir) 21 | mock_instance.invoke.assert_called_once_with(args=['ls']) 22 | -------------------------------------------------------------------------------- /tests/test_opendbt_airflow.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.utils.dates import days_ago 3 | 4 | from base_dbt_test import BaseDbtTest 5 | from opendbt.airflow import OpenDbtAirflowProject 6 | 7 | 8 | class TestOpenDbtProject(BaseDbtTest): 9 | 10 | def get_dag(self): 11 | return DAG( 12 | dag_id='dbt_test_workflow', 13 | schedule_interval=None, 14 | start_date=days_ago(3), 15 | catchup=False, 16 | max_active_runs=1 17 | ) 18 | 19 | def test_run_dbt_as_airflow_task(self): 20 | with self.get_dag() as dag: 21 | # load dbt jobs to airflow dag 22 | p = OpenDbtAirflowProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR, target='dev') 23 | p.load_dbt_tasks(dag=dag, 24 | include_singular_tests=True, 25 | include_dbt_seeds=True) 26 | 27 | for j in dag.tasks: 28 | if 'my_first_dbt_model' in j.task_id: 29 | j.execute({}) 30 | if 'my_executedlt_model' in j.task_id: 31 | j.execute({}) 32 | if 'my_executepython_model' in j.task_id: 33 | j.execute({}) 34 | -------------------------------------------------------------------------------- /tests/test_opendbt_cli.py: -------------------------------------------------------------------------------- 1 | import json 2 | import unittest 3 | 4 | import semver 5 | from dbt.exceptions import DbtRuntimeError 6 | 7 | from base_dbt_test import BaseDbtTest 8 | from opendbt import OpenDbtProject, OpenDbtCli 9 | from opendbt.examples import email_dbt_test_callback 10 | 11 | 12 | class TestOpenDbtCli(BaseDbtTest): 13 | 14 | def test_run_failed(self): 15 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 16 | with self.assertRaises(DbtRuntimeError) as context: 17 | dp.run(command="run", args=['--select', '+my_failing_dbt_model']) 18 | 19 | self.assertIn('Referenced column "non_exists_column" not found in FROM clause', str(context.exception.msg)) 20 | 21 | def test_cli_attributes(self): 22 | dp = OpenDbtCli(project_dir=self.DBTCORE_DIR) 23 | self.assertEqual(dp.project.project_name, "dbtcore") 24 | self.assertEqual(dp.project.profile_name, "dbtcore") 25 | self.assertIn('dbt_custom_adapter', dp.project_vars) 26 | self.assertIn('dbt_callbacks', dp.project_vars) 27 | self.assertEqual(dp.project_vars['dbt_custom_adapter'], 'opendbt.examples.DuckDBAdapterV2Custom') 28 | 29 | def test_cli_callbacks(self): 30 | dp = OpenDbtCli(project_dir=self.DBTCORE_DIR) 31 | self.assertIn(email_dbt_test_callback, dp.project_callbacks) 32 | 33 | with self.assertLogs('dbtcallbacks', level='INFO') as cm: 34 | try: 35 | dp.invoke(args=["test", '--select', 'my_core_table1 my_first_dbt_model', "--profiles-dir", 36 | dp.project_dir.as_posix()]) 37 | except: 38 | pass 39 | 40 | self.assertIn('DBT callback `email_dbt_test_callback` called', str(cm.output)) 41 | self.assertIn('Callback email sent', str(cm.output)) 42 | # self.assertIn('dbt test', str(cm.output)) 43 | 44 | def test_cli_run_models(self): 45 | dp = OpenDbtCli(project_dir=self.DBTCORE_DIR) 46 | dp.invoke(args=['run', "--exclude", "my_failing_dbt_model", "--profiles-dir", dp.project_dir.as_posix()]) 47 | 48 | def test_cli_run_cross_project_ref_models(self): 49 | dpf = OpenDbtCli(project_dir=self.DBTFINANCE_DIR) 50 | dpf.invoke( 51 | args=['run', '--select', '+my_cross_project_ref_model', "--profiles-dir", dpf.project_dir.as_posix()]) 52 | 53 | @unittest.skipIf(semver.Version.parse(BaseDbtTest.DBT_VERSION_STR) < semver.Version.parse("1.8.0"), 'skip') 54 | def test_cli_run_result(self): 55 | run_info = self.DBTCORE_DIR.joinpath("target/run_info.json") 56 | if run_info.exists(): 57 | run_info.write_text('') 58 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 59 | dp.run(command="build", args=['--select', 'my_core_table1']) 60 | data = json.loads(run_info.read_text()) 61 | self.assertEqual(1, len(data['nodes'])) 62 | self.assertIn("model.dbtcore.my_core_table1", data['nodes']) 63 | print(json.dumps(data, indent=4)) 64 | 65 | dp.run(command="build", args=['--select', 'my_executesql_dbt_model']) 66 | data = json.loads(run_info.read_text()) 67 | self.assertEqual(2, len(data['nodes'])) 68 | self.assertIn("model.dbtcore.my_executesql_dbt_model", data['nodes']) 69 | print(json.dumps(data, indent=4)) 70 | -------------------------------------------------------------------------------- /tests/test_opendbt_mesh.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from base_dbt_test import BaseDbtTest 4 | from opendbt import OpenDbtProject 5 | 6 | 7 | class TestOpenDbtMesh(BaseDbtTest): 8 | 9 | def test_run_cross_project(self): 10 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 11 | dp.run(command="compile") 12 | 13 | dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR) 14 | dp.run(command="compile") 15 | 16 | manifest = json.loads(self.DBTFINANCE_DIR.joinpath("target/manifest.json").read_text()) 17 | model = manifest.get("nodes").get("model.dbtfinance.my_cross_project_ref_model", {}) 18 | print(model) 19 | self.assertEqual(model["database"], 'dev') 20 | self.assertEqual(model['schema'], 'finance') 21 | self.assertEqual(model['name'], 'my_cross_project_ref_model') 22 | 23 | model = manifest.get("nodes").get("model.dbtcore.my_core_table1", {}) 24 | self.assertEqual(model['database'], 'dev') 25 | self.assertEqual(model['schema'], 'core') 26 | self.assertEqual(model['name'], 'my_core_table1') 27 | print(model) 28 | -------------------------------------------------------------------------------- /tests/test_opendbt_project.py: -------------------------------------------------------------------------------- 1 | from base_dbt_test import BaseDbtTest 2 | from opendbt import OpenDbtProject 3 | 4 | 5 | class TestOpenDbtProject(BaseDbtTest): 6 | def test_run_compile(self): 7 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 8 | dp.run(command="compile") 9 | 10 | def test_run_run(self): 11 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 12 | dp.run(command="run", 13 | args=['--select', '+my_second_dbt_model+', "--exclude", "my_failing_dbt_model"], 14 | use_subprocess=True) 15 | 16 | def test_project_attributes(self): 17 | dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR) 18 | self.assertEqual(dp.project.profile_name, "dbtcore") 19 | self.assertEqual(dp.project_vars['dbt_custom_adapter'], 'opendbt.examples.DuckDBAdapterV2Custom') 20 | --------------------------------------------------------------------------------