├── .github └── workflows │ └── python-publish.yml ├── .gitignore ├── LICENSE ├── README.md ├── dbt ├── __init__.py ├── adapters │ ├── __init__.py │ └── dremio │ │ ├── __init__.py │ │ ├── api │ │ ├── basic.py │ │ ├── endpoints.py │ │ ├── error.py │ │ └── query.py │ │ ├── connections.py │ │ ├── impl.py │ │ └── relation.py └── include │ ├── __init__.py │ └── dremio │ ├── __init__.py │ ├── dbt_project.yml │ ├── macros │ ├── adapters │ │ ├── columns.sql │ │ ├── data_preparation.sql │ │ ├── external_query.sql │ │ ├── format.sql │ │ ├── metadata.sql │ │ ├── relation.sql │ │ ├── schema.sql │ │ └── sources.sql │ ├── builtins │ │ └── builtins.sql │ ├── get_custom_name │ │ ├── get_custom_alias.sql │ │ ├── get_custom_database.sql │ │ ├── get_custom_schema.sql │ │ └── is_datalake_node.sql │ └── materializations │ │ ├── helpers.sql │ │ ├── incremental │ │ ├── incremental.sql │ │ ├── strategies.sql │ │ └── validate.sql │ │ ├── reflection │ │ ├── create_reflection.sql │ │ ├── helpers.sql │ │ └── reflection.sql │ │ ├── seed │ │ ├── helpers.sql │ │ └── seed.sql │ │ ├── table │ │ ├── alter_pds.sql │ │ ├── create_table_as.sql │ │ └── table.sql │ │ ├── twin_strategy.sql │ │ └── view │ │ ├── create_or_replace_view.sql │ │ ├── create_view_as.sql │ │ ├── helpers.sql │ │ └── view.sql │ └── sample_profiles.yml └── setup.py /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | .idea 141 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![dbt-dremio](https://resumo.cloud/wp-content/uploads/2021/07/modelo-imagem-rc-16-1.png) 2 | 3 | # Hi all dbt-dremio users ! dremio is now the official maintainer of the project ! I initiated it two years ago. Meanwhile, dbt and dremio have become such great products that dbt-dremio really deserves much more attention than I can offer it on my spare time. Thank you for your feedback during these two years. Long live open-source, long live dbt and dremio ! Here is the new repo : [dbt-dremio](https://github.com/dremio/dbt-dremio) 4 | 5 | > *This project is developed during my spare time, along side my lead dev position at [MAIF-VIE](http://www.maif.fr), and aims to provide a competitive alternative solution for our current ETL stack.* 6 | 7 | # dbt-dremio 8 | [dbt](https://www.getdbt.com/)'s adapter for [dremio](https://www.dremio.com/) 9 | 10 | If you are reading this documentation, I assume you already know well both dbt and dremio. Please refer to their respective documentation. 11 | 12 | # Installation 13 | dbt dependencies : 14 | - dbt-core>=1.0.6, 15 | - pyodbc>=4.0.27 16 | 17 | dremio dependency : 18 | - latest dremio's odbc driver 19 | - dremio >= 21.0.0 20 | - `dremio.iceberg.enabled`, `dremio.iceberg.ctas.enabled` and `dremio.execution.support_unlimited_splits` enabled 21 | 22 | os dependency : 23 | - odbc (unixodbc-dev on linux) 24 | 25 | `pip install dbt-dremio` 26 | 27 | # Relation types 28 | In dbt's world, A dremio relation can be either a `view` or a `table`. A dremio reflection - a dataset materialization with a refresh policy - will be mapped to a dbt `materializedview` relation. 29 | 30 | # Databases 31 | As Dremio is a federation tool, dbt's queries can span locations and so, in dremio's adapter, "databases" are paramount. 32 | There are three kinds of dataset locations : external sources, datalakes and spaces. Sources are input locations, datalakes are both input and output locations and spaces can only contains views, with exceptions : 33 | 34 | location|can create table| can drop table |can create/drop view 35 | -|-|-|- 36 | external source|no|no|no 37 | datalake|if CTAS (`CREATE TABLE AS`) is allowed on this source|if `DROP TABLE` is allowed on this source|no 38 | space|only in the user's home space, and by manually uploading files in the UI|only in the UI|yes 39 | distributed shared storage (`$scratch` source)|yes|yes|no 40 | 41 | As you can see, using the SQL-DDL interface, the location type implies the relation type, so materialization implementations do not have to take care of possible relation type mutations. 42 | 43 | The UI allows dots in a space's name : **the adapter does not handle that correctly**. 44 | 45 | # Schemas 46 | In dremio, schemas are recursive, like filesystem folders : `dbt.internal."my very strange folder's name"`, and dots are not allowed in sub-folder's names. For each database, there is a root schema, known as `no_schema`by the adapter. So, in order to materialize a model at the root folder of the `track17`space, one will configure it as : 47 | 48 | +database: track17 49 | +schema: no_schema 50 | 51 | **Please note that because dremio has no `CREATE SCHEMA` command yet, all schemas must be created before in the UI or via the API.** 52 | It may change when I replace ODBC with API calls. 53 | 54 | # Rendering of a relation 55 | 56 | Because dremio accepts almost any string character in the objects' names, the adapter will double quote each part of the database.schema.identifier tryptic with the following rules concerning schema : 57 | 58 | - if schema is equal to `no_schema`, the schema will not be included, leading to a simple `"database"."identifier"` being rendered 59 | - if schema spans multiple folders, each folder's name will be double quoted, leading to `"database"."folder"."sub-folder"."sub-sub-folder"."identifier"`. 60 | 61 | # Sources 62 | 63 | In dbt, a source is a set of read-only datasets, foundation of the downstream transformation steps toward the datasets that will be exposed to the end users. 64 | 65 | ## Environments 66 | 67 | A same dremio installation could handle several data environments. In order to group sources by environment, you can use the undocumented `target.profile_name` or the adapter specific `environment` configuration to map environments between dremio and dbt : 68 | 69 | - dremio's side: prefix all the sources' names of a specific environment `prd` with the environment's name, for example : `prd_crm, prd_hr, prd_accounting` 70 | - dbt's side: prefix all source's database configs like this : `{{target.environment}}_crm` or `{{target.profile_name}}_crm` 71 | 72 | That way you can configure seperately input sources and output `databases/datalakes`. 73 | 74 | # Materializations 75 | 76 | In dbt, a transformation step is called a **model**; defined by a `SELECT` statement embedded in a jinja2 template. Its `FROM` clause may reference source tables and/or other upstream models. A model is also the dataset resulting from this transformation, in fact the kind of SQL object it will be materialized in. Will it be a Common Table Expression used in downstream models ? A view ? A table ? Don't worry, just change the `materialized` parameter's value, and dbt will do that for you ! 77 | 78 | ## Dremio's SQL specificities 79 | 80 | Tables and views cannot coexist in a same database/datalake. So the usual dbt database+schema configuration stands only for views. Seeds, tables, incrementals and even stored tests will use a parallel datalake+root_path configuration. This configuration was also added in the profiles. 81 | 82 | ## Seed 83 | 84 | A seed can be viewed as a kind of static model; defined by a csv file, this is also a kind of version controled source table. 85 | 86 | adapter's specific configuration|type|required|default 87 | -|-|-|- 88 | datalake|CTAS/DROP TABLE allowed source's name|no|`$scratch` 89 | root_path|the relative path in the datalake|no|`no_schema` 90 | file|don't name the table like the model, use that alias instead|no| 91 | 92 | CREATE TABLE AS 93 | SELECT * 94 | FROM VALUES()[,()] 95 | 96 | As dremio odbc bridge does not support query bindings (but Arrow flight SQL does...), the python value is converted as string, quoted and casted in the column sql type. 97 | 98 | ## View 99 | 100 | adapter's specific configuration|type|required|default 101 | -|-|-|- 102 | database|any space (or home space) root|no|`@user` 103 | schema|relative path in this space|no|`no_schema` 104 | alias|don't name the view like the model, use that alias instead|no| 105 | CREATE OR REPLACE VIEW AS 106 | {{ sql }} 107 | 108 | ## Table 109 | 110 | adapter's specific configuration|type|required|default 111 | -|-|-|- 112 | datalake|CTAS/DROP TABLE allowed source's name|no|`$scratch` 113 | root_path||no|`no_schema` 114 | file|don't name the table like the model, use that alias instead|no| 115 | 116 | 117 | CREATE TABLE tblname [ (field1, field2, ...) ] 118 | [ (STRIPED, HASH, ROUNDROBIN) PARTITION BY (field1, field2, ..) ] 119 | [ DISTRIBUTE BY (field1, field2, ..) ] 120 | [ LOCALSORT BY (field1, field2, ..) ] 121 | [ STORE AS (opt1 => val1, opt2 => val3, ...) ] 122 | [ WITH SINGLE WRITER ] 123 | [ AS select_statement ] 124 | 125 | ## Incremental 126 | 127 | This is a very interesting materialization. An incremental transformation does not only reference other models and/or sources, but also itself. 128 | As the `SELECT` statement is embedded in a jinja2 template, it can be written so to produce two distinct datasets using the `is_incremental()` macro; one for (re)initialization; one for incremental update, based on the current content of the already created dataset. The SQL will reference the current dataset state with the special `{{ this }}` relation. 129 | 130 | ### the `append`strategy is available in dbt when `dremio.iceberg.ctas.enabled=yes` in dremio. 131 | 132 | adapter's specific configuration|type|required|default 133 | -|-|-|- 134 | datalake|CTAS/DROP TABLE allowed source's name|no|`$scratch` 135 | root_path||no|`no_schema` 136 | incremental_strategy| only `append` for the moment|no|`append` 137 | on_schema_change| `sync_all_columns`, `append_new_columns`, `fail`, `ignore`|no|`ignore` 138 | file|don't name the table like the model, use that alias instead|no| 139 | 140 | Other strategies will be implemented when dremio can `INSERT OVERWRITE` or `MERGE/UPDATE` in an iceberg table. 141 | 142 | ## Reflection 143 | 144 | A reflection is a materialization of a dataset (its anchor), with a refresh policy, handled internally by dremio, of three different kinds : 145 | - a **raw** reflection will act as a materialized view of all or a subset of an upstream model's columns (usually a view) 146 | - a **aggregate** reflection is much like a mondrian aggregation table, pre-aggregated measures on a subset of dimension columns 147 | - a **external** reflection just tell dremio to use a dataset (external target) as a possible materialization of another dataset. 148 | 149 | The `dremio:reflections_enabled` boolean dbt variable can be used to disable reflection management in dbt. 150 | That way, you can still use dbt ontop dremio enterprise edition, even without admin rights needed to read `sys.reflections` table. 151 | 152 | adapter's specific configuration|reflection type|type|required|default 153 | -|-|-|-|- 154 | anchor|all but external|the anchor model name|only if there is more than one `-- depends_on` clause in the model SQL| 155 | reflection_type|all|`raw`, `aggregate` or `external`|no|`raw` 156 | external_target|external| the underlying target|yes| 157 | display|raw|list of columns|no|all columns 158 | dimensions|aggregate|list of dimension columns|no|all non decimal/float/double columns 159 | dimensions_by_day|aggregate|list of dimension timestamp columns we want to keep only the date part of|no|all timestamp columns 160 | measures|aggregate|list of measure columns|no|all decimal/float/double columns 161 | computations|aggregate|list of specific [computations](https://docs.dremio.com/software/sql-reference/sql-commands/acceleration/#aggregate-reflections)|no|`SUM, COUNT` for each measure (in array) 162 | arrow_cache|all but external|is the reflection using arrow caching ?|no|`false` 163 | ALTER TABLE tblname 164 | ADD RAW REFLECTION name 165 | USING 166 | DISPLAY (field1, field2) 167 | [ DISTRIBUTE BY (field1, field2, ..) ] 168 | [ (STRIPED, CONSOLIDATED) PARTITION BY (field1, field2, ..) ] 169 | [ LOCALSORT BY (field1, field2, ..) ] 170 | [ ARROW CACHE ] 171 | 172 | ALTER TABLE tblname 173 | ADD AGGREGATE REFLECTION name 174 | USING 175 | DIMENSIONS (field1, field2) 176 | MEASURES (field1, field2) 177 | [ DISTRIBUTE BY (field1, field2, ..) ] 178 | [ (STRIPED, CONSOLIDATED) PARTITION BY (field1, field2, ..) ] 179 | [ LOCALSORT BY (field1, field2, ..) ] 180 | [ ARROW CACHE ] 181 | 182 | ALTER TABLE tblname 183 | ADD EXTERNAL REFLECTION name 184 | USING target 185 | 186 | The model definition will not contain a `SELECT` statement, but a simple : 187 | 188 | -- depends_on: {{ ref('my_anchor') }} 189 | 190 | ## Format configuration 191 | 192 | For persisted models, a format can be specified in its `config` block; for a source table, in its `external` properties block. 193 | 194 | Seed, table and incremental materializations share the same format configuration : 195 | 196 | in `config` or `external`blocks|format|type|required|default 197 | -|-|-|-|- 198 | format||`text`, `json`, `arrow`, `parquet`, `iceberg`|no|`iceberg` 199 | field_delimiter|text|field delimiter character|no| 200 | line_delimiter|text|line delimiter character|no| 201 | quote|text|quote character|no| 202 | comment|text|comment character|no| 203 | escape|text|escape character|no| 204 | skip_first_line|text|do not read first line ?|no| 205 | extract_header|text|extract header ?|no| 206 | trim_header|text|trim header column names ?|no| 207 | auto_generated_column_names|text|auto generate column names ?|no| 208 | pretty_print|json|write human readable json ?|no 209 | 210 | It's all the same for sources, with a few extra configurations : 211 | in `external`block |format|type|required|default 212 | -|-|-|-|- 213 | format||`excel`, `delta` (deltalake)|no| 214 | extract_header|text, excel|extract header from first line ?|no| 215 | sheet_name|excel|sheet's name in the excel file|no| 216 | xls|excel|is it an old excel file, not a xlsx one ?|no| 217 | has_merged_cells|excel|are there any merged cells ?|no| 218 | 219 | ## Partitioning configuration 220 | 221 | Any materialization except `view` can be partitioned. Dremio will add as many `dir0, dir1...` columns as needed to let the partitioning scheme show up in the source table, or model. 222 | 223 | `config`|materialization|type|required|default 224 | -|-|-|-|- 225 | partition_method|all but reflection|`striped`, `hash`, `roundrobin`|no| 226 | partition_method|reflection|`striped`, `consolidated`|no| 227 | partition_by|all |partition columns|no| 228 | localsort_by|all |sort columns within partition|no| 229 | distribute_by|all |distribution columns|no| 230 | single_writer|all but reflection|disable parallel write, incompatible with partition_by|no| 231 | 232 | ## Twin strategy configuration 233 | 234 | As tables and views cannot coexist neither in spaces or datalakes, when a model changes relation type, from view to incremental materialization for example, we can end up with both a view in a space, and a table in a datalake. 235 | 236 | At model level, dbt can apply a 'twin' strategy : 237 | - **allow** sql object homonyms of different types (relaxed behavior) : if a model changes relation type, the previous table or view remains. 238 | - **prevent** sql object homonym creation, dropping the previous relation of different type if it exists : the previous table or view is dropped. 239 | - **clone** a table relation as a view, in order to have a direct access to the model's dataset from the space layer. That time the view is neither left untouched nor dropped, but its definition is replaced with a straight `select * from {{ the_new_table_relation }}`. 240 | 241 | `config`|materialization|type|required|default 242 | -|-|-|-|- 243 | twin_strategy|every materialization but reflection|`allow`, `prevent`, `clone`|no|`clone` 244 | 245 | It should be safe as long as you don't play with `alias` and/or `file` configs. 246 | 247 | ## External queries 248 | 249 | dremio can send native SQL to external sources, this is known as ["External Queries"](https://docs.dremio.com/software/data-sources/external-queries/). 250 | 251 | The SQL must not contain any `ref()` or `source()`, only `[schema.]table` paths. 252 | 253 | At least one `-- depends_on : {{ source(my_source, my_table) }}` must be added to the model's SQL instead. To let source table dependencies show up in the auto generated documentation, you should indeed mention all the source tables used in the external query. 254 | 255 | `config`|materialization|type|required|default 256 | -|-|-|-|- 257 | external_query|view, table or incremental|`true` or `false`|no|`false` 258 | 259 | dbt will render the first source table like this : `{{ source(my_source, my_table).include(schema=false, identifier=false }}` to obtain the final source database and build the external query call : 260 | 261 | select * 262 | from table("source_name".external_query('sql')) 263 | 264 | # Connection 265 | 266 | Be careful to provide the right odbc driver's name in the adapter specific `driver` attribute, the one you gave to your dremio's odbc driver installation. 267 | 268 | Here are the profile default values : 269 | 270 | configuration | default 271 | -|- 272 | database|@user 273 | schema|no_schema 274 | datalake|$scratch 275 | root_path|no_schema 276 | 277 | ## Dremio Software 278 | 279 | With this default configuration, one can start trying dbt on dremio out of the box, as any dremio may have a user home space and a $scratch file system. 280 | 281 | track17: 282 | outputs: 283 | unmanaged: 284 | type: dremio 285 | threads: 2 286 | driver: Dremio ODBC Driver 64-bit 287 | host: veniseverte.fr 288 | port: 31010 289 | user: fabrice_etanchaud 290 | password: fabricesecretpassword 291 | managed: 292 | type: dremio 293 | threads: 2 294 | driver: Dremio ODBC Driver 64-bit 295 | host: veniseprovencale.fr 296 | port: 31010 297 | environment: track17 298 | database: '@dremio' 299 | schema: no_schema 300 | datalake: my_s3 301 | root_path: part.comp.biz 302 | user: dremio 303 | password: dremiosecretpassword 304 | target: unmanaged 305 | 306 | ## Dremio Cloud 307 | 308 | For Dremio Cloud, you will have to make an additional configuration change to the `dbt_project.yml`. 309 | Add the following to `dbt_project.yml`: 310 | 311 | vars: 312 | dremio:reflections_enabled: false 313 | 314 | The profile will look similar to below: 315 | 316 | [project_name]: 317 | outputs: 318 | test: 319 | type: dremio 320 | threads: 2 321 | driver: Dremio ODBC Driver 64-bit 322 | host: sql.dremio.cloud 323 | port: 443 324 | database: '@[YOUR_EMAIL]' 325 | schema: no_schema 326 | user: $token 327 | password: [PAT] 328 | additional_parameters: SSL=1; 329 | target: test 330 | 331 | # Behind the scenes 332 | ## How dremio does "format on read" ? 333 | 334 | Dremio has an interesting feature : it can format a raw dataset "on read" that way : 335 | 336 | select * 337 | from table( 338 | "datalake"."root_path1"."root_path2"."identifier" 339 | (type=>'text', fieldDelimiter=>';') 340 | ) 341 | This adapter uses that feature to render a decorated `Relation` of a formatted model or source table : instead of the usual `"datalake"."root_path1"."root_path2"."identifier"`, the `ref()` and `source()` macros are overridden to read the format from the node's `model.config` or `source.external` block, and decorate the path given by their `builtins` version. 342 | 343 | This has a drawback : A formatted source table or a formatted model cannot be a reflection's anchor. You will have to create a proxy view. 344 | 345 | ## How dbt-dremio handle custom `datalake`/`root_path` 346 | 347 | Final`database` and `schema` model configurations are a mix of their target and custom values. The rules are defined in the well known `get_custom_(database|schema)_name` macros. 348 | 349 | `datalake` and `root_path` model configurations were introduced to circumvent the segregation dremio imposes between views and tables, and fit the target/custom handling. These macros were adapted to this end. 350 | If needed, please override the `get_custom_(database|schema)_name_impl` macros instead, to keep everything wired. 351 | -------------------------------------------------------------------------------- /dbt/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /dbt/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.dremio.connections import DremioConnectionManager 2 | from dbt.adapters.dremio.connections import DremioCredentials 3 | from dbt.adapters.dremio.impl import DremioAdapter 4 | 5 | from dbt.adapters.base import AdapterPlugin 6 | from dbt.include import dremio 7 | 8 | 9 | Plugin = AdapterPlugin( 10 | adapter=DremioAdapter, 11 | credentials=DremioCredentials, 12 | include_path=dremio.PACKAGE_PATH) 13 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/api/basic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright (c) 2019 Ryan Murray. 4 | # 5 | # This file is part of Dremio Client 6 | # (see https://github.com/rymurr/dremio_client). 7 | # 8 | # Licensed to the Apache Software Foundation (ASF) under one 9 | # or more contributor license agreements. See the NOTICE file 10 | # distributed with this work for additional information 11 | # regarding copyright ownership. The ASF licenses this file 12 | # to you under the Apache License, Version 2.0 (the 13 | # "License"); you may not use this file except in compliance 14 | # with the License. You may obtain a copy of the License at 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | # Unless required by applicable law or agreed to in writing, 19 | # software distributed under the License is distributed on an 20 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 | # KIND, either express or implied. See the License for the 22 | # specific language governing permissions and limitations 23 | # under the License. 24 | # 25 | import requests 26 | 27 | 28 | def login(base_url, username, password, timeout=10, verify=True): 29 | """ 30 | Log into dremio using basic auth 31 | :param base_url: Dremio url 32 | :param username: username 33 | :param password: password 34 | :param timeout: optional timeout 35 | :param verify: If false ignore ssl errors 36 | :return: auth token 37 | """ 38 | url = base_url + "/apiv2/login" 39 | 40 | r = requests.post(url, json={"userName": username, "password": password}, timeout=timeout, verify=verify) 41 | r.raise_for_status() 42 | return r.json()["token"] 43 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/api/endpoints.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright (c) 2019 Ryan Murray. 4 | # 5 | # This file is part of Dremio Client 6 | # (see https://github.com/rymurr/dremio_client). 7 | # 8 | # Licensed to the Apache Software Foundation (ASF) under one 9 | # or more contributor license agreements. See the NOTICE file 10 | # distributed with this work for additional information 11 | # regarding copyright ownership. The ASF licenses this file 12 | # to you under the Apache License, Version 2.0 (the 13 | # "License"); you may not use this file except in compliance 14 | # with the License. You may obtain a copy of the License at 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | # Unless required by applicable law or agreed to in writing, 19 | # software distributed under the License is distributed on an 20 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 | # KIND, either express or implied. See the License for the 22 | # specific language governing permissions and limitations 23 | # under the License. 24 | # 25 | import requests 26 | import json as jsonlib 27 | from requests.exceptions import HTTPError 28 | from six.moves.urllib.parse import quote 29 | 30 | from error import ( 31 | DremioBadRequestException, 32 | DremioException, 33 | DremioNotFoundException, 34 | DremioPermissionException, 35 | DremioUnauthorizedException, 36 | DremioAlreadyExistsException 37 | ) 38 | 39 | def _get_headers(token): 40 | headers = {"Authorization": "_dremio{}".format(token), "content-type": "application/json"} 41 | return headers 42 | 43 | def _get(url, token, details="", ssl_verify=True): 44 | r = requests.get(url, headers=_get_headers(token), verify=ssl_verify) 45 | return _check_error(r, details) 46 | 47 | def _post(url, token, json=None, details="", ssl_verify=True): 48 | if isinstance(json, str): 49 | json = jsonlib.loads(json) 50 | r = requests.post(url, headers=_get_headers(token), verify=ssl_verify, json=json) 51 | return _check_error(r, details) 52 | 53 | def _raise_for_status(self): 54 | """Raises stored :class:`HTTPError`, if one occurred. Copy from requests request.raise_for_status()""" 55 | 56 | http_error_msg = "" 57 | if isinstance(self.reason, bytes): 58 | try: 59 | reason = self.reason.decode("utf-8") 60 | except UnicodeDecodeError: 61 | reason = self.reason.decode("iso-8859-1") 62 | else: 63 | reason = self.reason 64 | 65 | if 400 <= self.status_code < 500: 66 | http_error_msg = u"%s Client Error: %s for url: %s" % (self.status_code, reason, self.url) 67 | 68 | elif 500 <= self.status_code < 600: 69 | http_error_msg = u"%s Server Error: %s for url: %s" % (self.status_code, reason, self.url) 70 | 71 | if http_error_msg: 72 | return HTTPError(http_error_msg, response=self), self.status_code, reason 73 | else: 74 | return None, self.status_code, reason 75 | 76 | def _check_error(r, details=""): 77 | error, code, _ = _raise_for_status(r) 78 | if not error: 79 | try: 80 | data = r.json() 81 | return data 82 | except: # NOQA 83 | return r.text 84 | if code == 400: 85 | raise DremioBadRequestException("Bad request:" + details, error, r) 86 | if code == 401: 87 | raise DremioUnauthorizedException("Unauthorized:" + details, error, r) 88 | if code == 403: 89 | raise DremioPermissionException("No permission:" + details, error, r) 90 | if code == 404: 91 | raise DremioNotFoundException("Not found:" + details, error, r) 92 | if code == 409: 93 | raise DremioAlreadyExistsException("Already exists:" + details, error, r) 94 | raise DremioException("Unknown error", error) 95 | 96 | def catalog_item(token, base_url, cid=None, path=None, ssl_verify=True): 97 | """fetch a specific catalog item by id or by path 98 | 99 | https://docs.dremio.com/rest-api/catalog/get-catalog-id.html 100 | https://docs.dremio.com/rest-api/catalog/get-catalog-path.html 101 | 102 | :param token: auth token from previous login attempt 103 | :param base_url: base Dremio url 104 | :param cid: unique dremio id for resource 105 | :param path: list ['space', 'folder', 'vds'] 106 | :param ssl_verify: ignore ssl errors if False 107 | :return: json of resource 108 | """ 109 | if cid is None and path is None: 110 | raise TypeError("both id and path can't be None for a catalog_item call") 111 | idpath = (cid if cid else "") + ", " + (".".join(path) if path else "") 112 | cpath = [quote(i, safe="") for i in path] if path else "" 113 | endpoint = "/{}".format(cid) if cid else "/by-path/{}".format("/".join(cpath).replace('"', "")) 114 | return _get(base_url + "/api/v3/catalog{}".format(endpoint), token, idpath, ssl_verify=ssl_verify) 115 | 116 | def sql(token, base_url, query, context=None, ssl_verify=True): 117 | """submit job w/ given sql 118 | 119 | https://docs.dremio.com/rest-api/sql/post-sql.html 120 | 121 | :param token: auth token 122 | :param base_url: base Dremio url 123 | :param query: sql query 124 | :param context: optional dremio context 125 | :param ssl_verify: ignore ssl errors if False 126 | :return: job id json object 127 | """ 128 | return _post(base_url + "/api/v3/sql", token, ssl_verify=ssl_verify, json={"sql": query, "context": context}) 129 | 130 | def job_status(token, base_url, job_id, ssl_verify=True): 131 | """fetch job status 132 | 133 | https://docs.dremio.com/rest-api/jobs/get-job.html 134 | 135 | :param token: auth token 136 | :param base_url: sql query 137 | :param job_id: job id (as returned by sql) 138 | :param ssl_verify: ignore ssl errors if False 139 | :return: status object 140 | """ 141 | return _get(base_url + "/api/v3/job/{}".format(job_id), token, ssl_verify=ssl_verify) 142 | 143 | def job_results(token, base_url, job_id, offset=0, limit=100, ssl_verify=True): 144 | """fetch job results 145 | 146 | https://docs.dremio.com/rest-api/jobs/get-job.html 147 | 148 | :param token: auth token 149 | :param base_url: sql query 150 | :param job_id: job id (as returned by sql) 151 | :param offset: offset of result set to return 152 | :param limit: number of results to return (max 500) 153 | :param ssl_verify: ignore ssl errors if False 154 | :return: result object 155 | """ 156 | return _get( 157 | base_url + "/api/v3/job/{}/results?offset={}&limit={}".format(job_id, offset, limit), 158 | token, 159 | ssl_verify=ssl_verify, 160 | ) 161 | 162 | def delete_catalog(token, base_url, cid, tag, ssl_verify=True): 163 | """ remove a catalog item from Dremio 164 | 165 | https://docs.dremio.com/rest-api/catalog/delete-catalog-id.html 166 | 167 | :param token: auth token 168 | :param base_url: sql query 169 | :param cid: id of a catalog entity 170 | :param tag: version tag of entity 171 | :param ssl_verify: ignore ssl errors if False 172 | :return: None 173 | """ 174 | if tag is None: 175 | return _delete(base_url + "/api/v3/catalog/{}".format(cid), token, ssl_verify=ssl_verify) 176 | else: 177 | return _delete(base_url + "/api/v3/catalog/{}?tag={}".format(cid, tag), token, ssl_verify=ssl_verify) 178 | 179 | 180 | def set_catalog(token, base_url, json, ssl_verify=True): 181 | """ add a new catalog entity 182 | 183 | https://docs.dremio.com/rest-api/catalog/post-catalog.html 184 | 185 | :param token: auth token 186 | :param base_url: sql query 187 | :param json: json document for new catalog entity 188 | :param ssl_verify: ignore ssl errors if False 189 | :return: new catalog entity 190 | """ 191 | return _post(base_url + "/api/v3/catalog", token, json, ssl_verify=ssl_verify) 192 | 193 | 194 | def update_catalog(token, base_url, cid, json, ssl_verify=True): 195 | """ update a catalog entity 196 | 197 | https://docs.dremio.com/rest-api/catalog/put-catalog-id.html 198 | 199 | :param token: auth token 200 | :param base_url: sql query 201 | :param cid: id of catalog entity 202 | :param json: json document for new catalog entity 203 | :param ssl_verify: ignore ssl errors if False 204 | :return: updated catalog entity 205 | """ 206 | return _put(base_url + "/api/v3/catalog/{}".format(cid), token, json, ssl_verify=ssl_verify) 207 | 208 | 209 | def promote_catalog(token, base_url, cid, json, ssl_verify=True): 210 | """ promote a catalog entity (only works on folders and files in sources 211 | 212 | https://docs.dremio.com/rest-api/catalog/post-catalog-id.html 213 | 214 | :param token: auth token 215 | :param base_url: sql query 216 | :param cid: id of catalog entity 217 | :param json: json document for new catalog entity 218 | :param ssl_verify: ignore ssl errors if False 219 | :return: updated catalog entity 220 | """ 221 | return _post(base_url + "/api/v3/catalog/{}".format(cid), token, json, ssl_verify=ssl_verify) 222 | 223 | def collaboration_tags(token, base_url, cid, ssl_verify=True): 224 | """fetch tags for a catalog entry 225 | 226 | https://docs.dremio.com/rest-api/user/get-catalog-collaboration.html 227 | 228 | :param token: auth token 229 | :param base_url: sql query 230 | :param cid: id of a catalog entity 231 | :param ssl_verify: ignore ssl errors if False 232 | :return: result object 233 | """ 234 | return _get(base_url + "/api/v3/catalog/{}/collaboration/tag".format(cid), token, ssl_verify=ssl_verify) 235 | 236 | def collaboration_wiki(token, base_url, cid, ssl_verify=True): 237 | """fetch wiki for a catalog entry 238 | 239 | https://docs.dremio.com/rest-api/user/get-catalog-collaboration.html 240 | 241 | :param token: auth token 242 | :param base_url: sql query 243 | :param cid: id of a catalog entity 244 | :param ssl_verify: ignore ssl errors if False 245 | :return: result object 246 | """ 247 | return _get(base_url + "/api/v3/catalog/{}/collaboration/wiki".format(cid), token, ssl_verify=ssl_verify) 248 | 249 | def set_collaboration_tags(token, base_url, cid, tags, ssl_verify=True): 250 | """ set tags on a given catalog entity 251 | 252 | https://docs.dremio.com/rest-api/catalog/post-catalog-collaboration.html 253 | 254 | :param token: auth token 255 | :param base_url: sql query 256 | :param cid: id of a catalog entity 257 | :param tags: list of strings for tags 258 | :param ssl_verify: ignore ssl errors if False 259 | :return: None 260 | """ 261 | json = {"tags": tags} 262 | try: 263 | old_tags = collaboration_tags(token, base_url, cid, ssl_verify) 264 | json["version"] = old_tags["version"] 265 | except: # NOQA 266 | pass 267 | return _post(base_url + "/api/v3/catalog/{}/collaboration/tag".format(cid), token, ssl_verify=ssl_verify, json=json) 268 | 269 | def set_collaboration_wiki(token, base_url, cid, wiki, ssl_verify=True): 270 | """ set wiki on a given catalog entity 271 | 272 | https://docs.dremio.com/rest-api/catalog/post-catalog-collaboration.html 273 | 274 | :param token: auth token 275 | :param base_url: sql query 276 | :param cid: id of a catalog entity 277 | :param wiki: text representing markdown for entity 278 | :param ssl_verify: ignore ssl errors if False 279 | :return: None 280 | """ 281 | json = {"text": wiki} 282 | try: 283 | old_wiki = collaboration_wiki(token, base_url, cid, ssl_verify) 284 | json["version"] = old_wiki["version"] 285 | except: # NOQA 286 | pass 287 | return _post( 288 | base_url + "/api/v3/catalog/{}/collaboration/wiki".format(cid), token, ssl_verify=ssl_verify, json=json 289 | ) 290 | 291 | def build_url(**kwargs): 292 | """ 293 | returns required url string 294 | :param kwargs: keyword arguments (dictionary) 295 | :return:string 296 | """ 297 | query = "&".join("{}={}".format(k,v) for k,v in kwargs.items() if v) 298 | if query: 299 | qry= "?{}".format(query) 300 | return qry 301 | return query 302 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/api/error.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright (c) 2019 Ryan Murray. 4 | # 5 | # This file is part of Dremio Client 6 | # (see https://github.com/rymurr/dremio_client). 7 | # 8 | # Licensed to the Apache Software Foundation (ASF) under one 9 | # or more contributor license agreements. See the NOTICE file 10 | # distributed with this work for additional information 11 | # regarding copyright ownership. The ASF licenses this file 12 | # to you under the Apache License, Version 2.0 (the 13 | # "License"); you may not use this file except in compliance 14 | # with the License. You may obtain a copy of the License at 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | # Unless required by applicable law or agreed to in writing, 19 | # software distributed under the License is distributed on an 20 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 | # KIND, either express or implied. See the License for the 22 | # specific language governing permissions and limitations 23 | # under the License. 24 | # 25 | 26 | 27 | class DremioException(Exception): 28 | """ 29 | base dremio exception 30 | """ 31 | """ 32 | { 33 | "errorMessage": "brief error message", 34 | "moreInfo": "detailed error message" 35 | } 36 | """ 37 | 38 | def __init__(self, msg, original_exception, response=None): 39 | super(DremioException, self).__init__(msg + (": %s" % original_exception)) 40 | self.original_exception = original_exception 41 | self.response = response 42 | 43 | 44 | class DremioUnauthorizedException(DremioException): 45 | pass 46 | 47 | class DremioPermissionException(DremioException): 48 | pass 49 | 50 | class DremioNotFoundException(DremioException): 51 | pass 52 | 53 | class DremioBadRequestException(DremioException): 54 | pass 55 | 56 | class DremioAlreadyExistsException(DremioException): 57 | pass 58 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/api/query.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright (c) 2019 Ryan Murray. 4 | # 5 | # This file is part of Dremio Client 6 | # (see https://github.com/rymurr/dremio_client). 7 | # 8 | # Licensed to the Apache Software Foundation (ASF) under one 9 | # or more contributor license agreements. See the NOTICE file 10 | # distributed with this work for additional information 11 | # regarding copyright ownership. The ASF licenses this file 12 | # to you under the Apache License, Version 2.0 (the 13 | # "License"); you may not use this file except in compliance 14 | # with the License. You may obtain a copy of the License at 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | # Unless required by applicable law or agreed to in writing, 19 | # software distributed under the License is distributed on an 20 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 | # KIND, either express or implied. See the License for the 22 | # specific language governing permissions and limitations 23 | # under the License. 24 | # 25 | import time 26 | from concurrent.futures.thread import ThreadPoolExecutor 27 | 28 | from error import DremioException 29 | from endpoints import job_results, job_status, sql 30 | 31 | 32 | executor = ThreadPoolExecutor(max_workers=8) 33 | 34 | _job_states = { 35 | "NOT_SUBMITTED", 36 | "STARTING", 37 | "RUNNING", 38 | "COMPLETED", 39 | "CANCELED", 40 | "FAILED", 41 | "CANCELLATION_REQUESTED", 42 | "ENQUEUED", 43 | } 44 | _done_job_states = {"COMPLETED", "CANCELED", "FAILED"} 45 | 46 | 47 | def run(token, base_url, query, context=None, sleep_time=10, ssl_verify=True): 48 | """ Run a single sql query 49 | 50 | This runs a single sql query against the rest api and returns a json document of the results 51 | 52 | :param token: API token from auth 53 | :param base_url: base url of Dremio instance 54 | :param query: valid sql query 55 | :param context: optional context in which to execute the query 56 | :param sleep_time: seconds to sleep between checking for finished state 57 | :param ssl_verify: verify ssl on web requests 58 | :raise: DremioException if job failed 59 | :raise: DremioUnauthorizedException if token is incorrect or invalid 60 | :return: json array of result rows 61 | 62 | :example: 63 | 64 | >>> run('abc', 'http://localhost:9047', 'select * from sys.options') 65 | [{'record':'1'}, {'record':'2'}] 66 | """ 67 | assert sleep_time > 0 68 | job = sql(token, base_url, query, context, ssl_verify=ssl_verify) 69 | job_id = job["id"] 70 | while True: 71 | state = job_status(token, base_url, job_id, ssl_verify=ssl_verify) 72 | if state["jobState"] == "COMPLETED": 73 | row_count = state.get("rowCount", 0) 74 | break 75 | if state["jobState"] in {"CANCELED", "FAILED"}: 76 | # todo add info about why did it fail 77 | raise DremioException("job failed " + str(state), None) 78 | time.sleep(sleep_time) 79 | count = 0 80 | while count < row_count: 81 | result = job_results(token, base_url, job_id, count, ssl_verify=ssl_verify) 82 | count += 100 83 | yield result 84 | 85 | 86 | def run_async(token, base_url, query, context=None, sleep_time=10, ssl_verify=True): 87 | """ Run a single sql query asynchronously 88 | 89 | This executes a single sql query against the rest api asynchronously and returns a future for the result 90 | 91 | :param token: API token from auth 92 | :param base_url: base url of Dremio instance 93 | :param query: valid sql query 94 | :param context: optional context in which to execute the query 95 | :param sleep_time: seconds to sleep between checking for finished state 96 | :param ssl_verify: verify ssl on web requests 97 | :raise: DremioException if job failed 98 | :raise: DremioUnauthorizedException if token is incorrect or invalid 99 | :return: concurrent.futures.Future for the result 100 | 101 | :example: 102 | 103 | >>> f = run_async('abc', 'http://localhost:9047', 'select * from sys.options') 104 | >>> f.result() 105 | [{'record':'1'}, {'record':'2'}] 106 | """ 107 | return executor.submit(run, token, base_url, query, context, sleep_time, ssl_verify) 108 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/connections.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | 3 | import pyodbc 4 | import time 5 | 6 | import dbt.exceptions 7 | from dbt.adapters.base import Credentials 8 | from dbt.adapters.sql import SQLConnectionManager 9 | from dbt.adapters.dremio.relation import DremioRelation 10 | from dbt.contracts.connection import AdapterResponse 11 | #from dbt.logger import GLOBAL_LOGGER as logger 12 | from dbt.events import AdapterLogger 13 | logger = AdapterLogger("dremio") 14 | 15 | from dataclasses import dataclass 16 | from typing import Optional, Union, Any 17 | 18 | from typing import Tuple, Union 19 | import agate 20 | 21 | @dataclass 22 | class DremioCredentials(Credentials): 23 | driver: str 24 | host: str 25 | UID: str 26 | PWD: str 27 | environment: Optional[str] 28 | database: Optional[str] 29 | schema: Optional[str] 30 | datalake: Optional[str] 31 | root_path: Optional[str] 32 | port: Optional[int] = 31010 33 | additional_parameters: Optional[str] = None 34 | 35 | _ALIASES = { 36 | 'user': 'UID' 37 | , 'username': 'UID' 38 | , 'pass': 'PWD' 39 | , 'password': 'PWD' 40 | , 'server': 'host' 41 | , 'track': 'environment' 42 | , 'space': 'database' 43 | , 'folder': 'schema' 44 | , 'materialization_database' : 'datalake' 45 | , 'materialization_schema' : 'root_path' 46 | } 47 | 48 | @property 49 | def type(self): 50 | return 'dremio' 51 | 52 | @property 53 | def unique_field(self): 54 | return self.host 55 | 56 | def _connection_keys(self): 57 | # return an iterator of keys to pretty-print in 'dbt debug' 58 | # raise NotImplementedError 59 | return 'driver', 'host', 'port', 'UID', 'database', 'schema', 'additional_parameters', 'datalake', 'root_path', 'environment' 60 | 61 | @classmethod 62 | def __pre_deserialize__(cls, data): 63 | data = super().__pre_deserialize__(data) 64 | if 'database' not in data: 65 | data['database'] = None 66 | if 'schema' not in data: 67 | data['schema'] = None 68 | if 'datalake' not in data: 69 | data['datalake'] = None 70 | if 'root_path' not in data: 71 | data['root_path'] = None 72 | if 'environment' not in data: 73 | data['environment'] = None 74 | return data 75 | 76 | def __post_init__(self): 77 | if self.database is None: 78 | self.database = '@' + self.UID 79 | if self.schema is None: 80 | self.schema = DremioRelation.no_schema 81 | if self.datalake is None: 82 | self.datalake = '$scratch' 83 | if self.root_path is None: 84 | self.root_path = DremioRelation.no_schema 85 | 86 | class DremioConnectionManager(SQLConnectionManager): 87 | TYPE = 'dremio' 88 | 89 | @contextmanager 90 | def exception_handler(self, sql): 91 | try: 92 | yield 93 | 94 | except pyodbc.DatabaseError as e: 95 | logger.debug('Database error: {}'.format(str(e))) 96 | 97 | try: 98 | # attempt to release the connection 99 | self.release() 100 | except pyodbc.Error: 101 | logger.debug("Failed to release connection!") 102 | pass 103 | 104 | raise dbt.exceptions.DatabaseException(str(e).strip()) from e 105 | 106 | except Exception as e: 107 | logger.debug(f"Error running SQL: {sql}") 108 | logger.debug("Rolling back transaction.") 109 | self.release() 110 | if isinstance(e, dbt.exceptions.RuntimeException): 111 | # during a sql query, an internal to dbt exception was raised. 112 | # this sounds a lot like a signal handler and probably has 113 | # useful information, so raise it without modification. 114 | raise 115 | 116 | raise dbt.exceptions.RuntimeException(e) 117 | 118 | @classmethod 119 | def open(cls, connection): 120 | 121 | if connection.state == 'open': 122 | logger.debug('Connection is already open, skipping open.') 123 | return connection 124 | 125 | credentials = connection.credentials 126 | 127 | try: 128 | con_str = ["ConnectionType=Direct", "AuthenticationType=Plain", "QueryTimeout=600"] 129 | con_str.append(f"Driver={{{credentials.driver}}}") 130 | con_str.append(f"HOST={credentials.host}") 131 | con_str.append(f"PORT={credentials.port}") 132 | con_str.append(f"UID={credentials.UID}") 133 | con_str.append(f"PWD={credentials.PWD}") 134 | if credentials.additional_parameters: 135 | con_str.append(f"{credentials.additional_parameters}") 136 | con_str_concat = ';'.join(con_str) 137 | logger.debug(f'Using connection string: {con_str_concat}') 138 | 139 | handle = pyodbc.connect(con_str_concat, autocommit=True) 140 | 141 | connection.state = 'open' 142 | connection.handle = handle 143 | logger.debug(f'Connected to db: {credentials.database}') 144 | 145 | except pyodbc.Error as e: 146 | logger.debug(f"Could not connect to db: {e}") 147 | 148 | connection.handle = None 149 | connection.state = 'fail' 150 | 151 | raise dbt.exceptions.FailedToConnectException(str(e)) 152 | 153 | return connection 154 | 155 | @classmethod 156 | def is_cancelable(cls) -> bool: 157 | return False 158 | 159 | def cancel(self, connection): 160 | pass 161 | 162 | def commit(self, *args, **kwargs): 163 | pass 164 | 165 | def rollback(self, *args, **kwargs): 166 | pass 167 | 168 | def add_begin_query(self): 169 | # return self.add_query('BEGIN TRANSACTION', auto_begin=False) 170 | pass 171 | 172 | def add_commit_query(self): 173 | # return self.add_query('COMMIT TRANSACTION', auto_begin=False) 174 | pass 175 | 176 | def add_query(self, sql, auto_begin=True, bindings=None, 177 | abridge_sql_log=False): 178 | 179 | connection = self.get_thread_connection() 180 | 181 | if auto_begin and connection.transaction_open is False: 182 | self.begin() 183 | 184 | logger.debug('Using {} connection "{}".' 185 | .format(self.TYPE, connection.name)) 186 | 187 | with self.exception_handler(sql): 188 | if abridge_sql_log: 189 | logger.debug('On {}: {}....'.format( 190 | connection.name, sql[0:512])) 191 | else: 192 | logger.debug('On {}: {}'.format(connection.name, sql)) 193 | pre = time.time() 194 | 195 | cursor = connection.handle.cursor() 196 | 197 | # pyodbc does not handle a None type binding! 198 | if bindings is None: 199 | cursor.execute(sql) 200 | else: 201 | cursor.execute(sql, bindings) 202 | 203 | logger.debug("SQL status: {} in {:0.2f} seconds".format( 204 | self.get_response(cursor), (time.time() - pre))) 205 | 206 | return connection, cursor 207 | 208 | @classmethod 209 | def get_credentials(cls, credentials): 210 | return credentials 211 | 212 | @classmethod 213 | def get_response(cls, cursor: pyodbc.Cursor) -> AdapterResponse: 214 | rows = cursor.rowcount 215 | message = 'OK' if rows == -1 else str(rows) 216 | return AdapterResponse( 217 | _message=message, 218 | rows_affected=rows 219 | ) 220 | 221 | def execute( 222 | self, sql: str, auto_begin: bool = False, fetch: bool = False 223 | ) -> Tuple[AdapterResponse, agate.Table]: 224 | sql = self._add_query_comment(sql) 225 | _, cursor = self.add_query(sql, auto_begin) 226 | response = self.get_response(cursor) 227 | fetch = True 228 | if fetch: 229 | table = self.get_result_from_cursor(cursor) 230 | else: 231 | table = dbt.clients.agate_helper.empty_table() 232 | cursor.close() 233 | return response, table 234 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/impl.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.sql import SQLAdapter 2 | from dbt.adapters.dremio import DremioConnectionManager 3 | from dbt.adapters.dremio.relation import DremioRelation 4 | 5 | from typing import List 6 | from typing import Optional 7 | import dbt.flags 8 | from dbt.adapters.base.relation import BaseRelation 9 | #from dbt.logger import GLOBAL_LOGGER as logger 10 | from dbt.events import AdapterLogger 11 | logger = AdapterLogger("dremio") 12 | 13 | from dbt.adapters.base.meta import available 14 | 15 | import agate 16 | 17 | class DremioAdapter(SQLAdapter): 18 | ConnectionManager = DremioConnectionManager 19 | Relation = DremioRelation 20 | 21 | @classmethod 22 | def date_function(cls): 23 | return 'current_date' 24 | 25 | @classmethod 26 | def convert_text_type(cls, agate_table, col_idx): 27 | return "varchar" 28 | 29 | @classmethod 30 | def convert_datetime_type(cls, agate_table, col_idx): 31 | return "timestamp" 32 | 33 | @classmethod 34 | def convert_date_type(cls, agate_table, col_idx): 35 | return "date" 36 | 37 | @classmethod 38 | def convert_boolean_type(cls, agate_table, col_idx): 39 | return "boolean" 40 | 41 | @classmethod 42 | def convert_number_type(cls, agate_table, col_idx): 43 | decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) 44 | return "decimal" if decimals else "bigint" 45 | 46 | @classmethod 47 | def convert_time_type(cls, agate_table, col_idx): 48 | return "time" 49 | -------------------------------------------------------------------------------- /dbt/adapters/dremio/relation.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from dbt.adapters.base.relation import BaseRelation, Policy, ComponentName, RelationType 3 | from typing import Optional, Tuple, Iterator 4 | 5 | @dataclass 6 | class DremioQuotePolicy(Policy): 7 | database: bool = True 8 | schema: bool = True 9 | identifier: bool = True 10 | 11 | @dataclass 12 | class DremioIncludePolicy(Policy): 13 | database: bool = True 14 | schema: bool = True 15 | identifier: bool = True 16 | 17 | @dataclass(frozen=True, eq=False, repr=False) 18 | class DremioRelation(BaseRelation): 19 | quote_policy: DremioQuotePolicy = DremioQuotePolicy() 20 | include_policy: DremioIncludePolicy = DremioIncludePolicy() 21 | no_schema = 'no_schema' 22 | format: Optional[str] = None 23 | format_clause: Optional[str] = None 24 | 25 | def __post_init__(self): 26 | if self.path.schema is None: 27 | self.path.schema = DremioRelation.no_schema 28 | 29 | def _render_iterator( 30 | self 31 | ) -> Iterator[Tuple[Optional[ComponentName], Optional[str]]]: 32 | 33 | for key in ComponentName: 34 | path_part: Optional[str] = None 35 | if self.include_policy.get_part(key): 36 | tmp_path_part = self.path.get_part(key) 37 | if not (key == ComponentName.Schema and tmp_path_part == self.no_schema): 38 | path_part = tmp_path_part 39 | if path_part is not None and (self.quote_policy.get_part(key)): # or key == ComponentName.Schema): 40 | path_part = self.quoted_by_component(path_part, key) 41 | yield key, path_part 42 | 43 | def quoted_by_component(self, identifier, componentName): 44 | if componentName == ComponentName.Schema: 45 | return '.'.join( 46 | self.quoted(folder) for folder in identifier.split('.') 47 | ) 48 | else: 49 | return self.quoted(identifier) 50 | 51 | def render(self) -> str: 52 | rendered = super().render() 53 | if self.format is not None and self.format_clause is not None: 54 | rendered = "".join (("table( ", rendered, " ( ", self.format_clause, " ) )")) 55 | return rendered 56 | -------------------------------------------------------------------------------- /dbt/include/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /dbt/include/dremio/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | PACKAGE_PATH = os.path.dirname(__file__) 3 | -------------------------------------------------------------------------------- /dbt/include/dremio/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | name: dbt_dremio 3 | version: 1.0 4 | config-version: 2 5 | 6 | quoting: 7 | database: true 8 | schema: true 9 | identifier: true 10 | 11 | macro-paths: ["macros"] 12 | 13 | vars: 14 | 'dremio:reflections_enabled': true 15 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/columns.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__get_columns_in_relation(relation) -%} 2 | 3 | {%- set database = relation.database.strip('"') -%} 4 | {%- set schema = relation.schema.strip('"') -%} 5 | {%- set identifier = relation.identifier.strip('"') -%} 6 | {%- set schema_name = database 7 | + (('.' + schema) if schema != 'no_schema' else '') -%} 8 | {% call statement('get_columns_in_relation', fetch_result=True) %} 9 | select column_name as column_name 10 | ,lower(data_type) as data_type 11 | ,character_maximum_length 12 | ,numeric_precision 13 | ,numeric_scale 14 | from information_schema.columns 15 | where ilike(table_schema, '{{ schema_name }}') 16 | and ilike(table_name, '{{ identifier }}') 17 | order by ordinal_position 18 | {% endcall %} 19 | {% set table = load_result('get_columns_in_relation').table %} 20 | {{ return(sql_convert_columns_in_relation(table)) }} 21 | 22 | {% endmacro %} 23 | 24 | {% macro dremio__alter_column_type(relation, column_name, new_column_type) -%} 25 | 26 | {% call statement('alter_column_type') %} 27 | alter table {{ relation }} alter column {{ adapter.quote(column_name) }} {{ adapter.quote(column_name) }} {{ new_column_type }} 28 | {% endcall %} 29 | 30 | {% endmacro %} 31 | 32 | {% macro dremio__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %} 33 | 34 | {% if add_columns is none %} 35 | {% set add_columns = [] %} 36 | {% endif %} 37 | {% if remove_columns is none %} 38 | {% set remove_columns = [] %} 39 | {% endif %} 40 | 41 | {% if add_columns | length > 0 %} 42 | {% set sql -%} 43 | alter {{ relation.type }} {{ relation }} add columns ( 44 | 45 | {% for column in add_columns %} 46 | {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }} 47 | {% endfor %} 48 | ) 49 | {%- endset -%} 50 | {% do run_query(sql) %} 51 | {% endif %} 52 | 53 | {% if remove_columns | length > 0 %} 54 | {% for column in remove_columns %} 55 | {% set sql -%} 56 | alter {{ relation.type }} {{ relation }} drop column {{ column.name }} 57 | {%- endset -%} 58 | {% do run_query(sql) %} 59 | {% endfor %} 60 | {% endif %} 61 | 62 | {% endmacro %} 63 | 64 | {% macro intersect_columns(source_columns, target_columns) %} 65 | 66 | {% set result = [] %} 67 | {% set target_names = target_columns | map(attribute = 'column') | list %} 68 | 69 | {# --check whether the name attribute exists in the target - this does not perform a data type check #} 70 | {% for sc in source_columns %} 71 | {% if sc.name in target_names %} 72 | {{ result.append(sc) }} 73 | {% endif %} 74 | {% endfor %} 75 | 76 | {{ return(result) }} 77 | 78 | {% endmacro %} 79 | 80 | {% macro get_quoted_csv(column_names, table_alias=none) %} 81 | 82 | {% set quoted = [] %} 83 | {% for col in column_names -%} 84 | {%- do quoted.append((adapter.quote(table_alias) ~ '.' if table_alias is not none else '') ~ adapter.quote(col)) -%} 85 | {%- endfor %} 86 | 87 | {%- set dest_cols_csv = quoted | join(', ') -%} 88 | {{ return(dest_cols_csv) }} 89 | 90 | {% endmacro %} 91 | 92 | {% macro diff_columns(source_columns, target_columns) %} 93 | 94 | {% set result = [] %} 95 | {% set target_names = target_columns | map(attribute = 'column') | list %} 96 | 97 | {# --check whether the name attribute exists in the target - this does not perform a data type check #} 98 | {% for sc in source_columns %} 99 | {% if sc.name not in target_names %} 100 | {{ result.append(sc) }} 101 | {% endif %} 102 | {% endfor %} 103 | 104 | {{ return(result) }} 105 | 106 | {% endmacro %} 107 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/data_preparation.sql: -------------------------------------------------------------------------------- 1 | {%- macro trim_varchar_columns(column_names, trimtext=none) -%} 2 | {%- if column_names is string -%} 3 | {%- set column_names = [column_names] -%} 4 | {% endif -%} 5 | {%- set result = [] -%} 6 | {%- for column_name in column_names -%} 7 | {%- set sql -%} 8 | nullif(btrim({{ adapter.quote(column_name) }} 9 | {%- if trimtext -%}, '{{ trimtext }}'{%- endif -%} 10 | ), '') 11 | {%- endset -%} 12 | {%- do result.append( 13 | { 14 | 'name': column_name 15 | ,'sql': sql 16 | } 17 | ) -%} 18 | {%- endfor -%} 19 | {{ return(result) }} 20 | {%- endmacro -%} 21 | 22 | {%- macro to_date_varchar_columns(column_names, format='YYYY-MM-DD') -%} 23 | {%- if column_names is string -%} 24 | {%- set column_names = [column_names] -%} 25 | {% endif -%} 26 | {%- set result = [] -%} 27 | {%- for column_name in column_names -%} 28 | {%- set sql -%} 29 | to_date(nullif(btrim({{ adapter.quote(column_name) }}), ''), '{{ format }}') 30 | {%- endset -%} 31 | {%- do result.append( 32 | { 33 | 'name': column_name 34 | ,'sql': sql 35 | } 36 | ) -%} 37 | {%- endfor -%} 38 | {{ return(result) }} 39 | {%- endmacro -%} 40 | 41 | {%- macro to_decimal_varchar_columns(column_names, decimal_separator=',', decimals=2) -%} 42 | {%- if column_names is string -%} 43 | {%- set column_names = [column_names] -%} 44 | {% endif -%} 45 | {%- set result = [] -%} 46 | {%- for column_name in column_names -%} 47 | {%- set sql -%} 48 | cast(cast(replace(nullif(btrim({{ adapter.quote(column_name) }}), ''), '{{ decimal_separator }}', '.') as double) as decimal(100, {{ decimals }})) 49 | {%- endset -%} 50 | {%- do result.append( 51 | { 52 | 'name': column_name 53 | ,'sql': sql 54 | } 55 | ) -%} 56 | {%- endfor -%} 57 | {{ return(result) }} 58 | {%- endmacro -%} 59 | 60 | {%- macro get_quoted_csv_sql_columns(sql_columns) -%} 61 | {%- for col in sql_columns -%} 62 | {{ col['sql'] }} as {{ adapter.quote(col['name']) }} 63 | {% if not loop.last -%},{%- endif -%} 64 | {%- endfor -%} 65 | {%- endmacro -%} 66 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/external_query.sql: -------------------------------------------------------------------------------- 1 | {%- macro external_query(sql) -%} 2 | {%- set source = validate_external_query() -%} 3 | {%- if source is not none -%} 4 | {%- set escaped_sql = sql | replace("'", "''") -%} 5 | {%- set result -%} 6 | select * 7 | from table({{ builtins.source(source[0], source[1]).include(schema=false, identifier=false) }}.external_query('{{ escaped_sql }}')) 8 | {%- endset -%} 9 | {%- else -%} 10 | {%- set result = sql -%} 11 | {%- endif -%} 12 | {{ return(result) }} 13 | {%- endmacro -%} 14 | 15 | {%- macro validate_external_query() -%} 16 | {%- set external_query = config.get('external_query', validator=validation.any[boolean]) or false -%} 17 | {%- if external_query -%} 18 | {%- if model.refs | length == 0 and model.sources | length > 0 -%} 19 | {%- set source_names = [] -%} 20 | {%- for source in model.sources -%} 21 | {%- do source_names.append(source[0]) if source[0] not in source_names -%} 22 | {% endfor %} 23 | {%- if source_names | length == 1 -%} 24 | {{ return(model.sources[0]) }} 25 | {%- else -%} 26 | {% do exceptions.raise_compiler_error("Invalid external query configuration: awaiting one single source name among all source dependencies") %} 27 | {%- endif -%} 28 | {%- else -%} 29 | {% do exceptions.raise_compiler_error("Invalid external query: awaiting only source dependencies") %} 30 | {%- endif -%} 31 | {%- else -%} 32 | {{ return(none) }} 33 | {%- endif -%} 34 | {%- endmacro -%} 35 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/format.sql: -------------------------------------------------------------------------------- 1 | 2 | {# 3 | input/output formats 4 | 5 | "type": String ['text', 'json', 'arrow', 'parquet', 'iceberg'] 6 | 7 | for 'text' : 8 | "fieldDelimiter": String, 9 | "lineDelimiter": String, 10 | "quote": String, 11 | "comment": String, 12 | "escape": String, 13 | "skipFirstLine": Boolean, 14 | "extractHeader": Boolean, 15 | "trimHeader": Boolean, 16 | "autoGenerateColumnNames": Boolean 17 | 18 | for 'json' : 19 | "prettyPrint" : Boolean 20 | 21 | input only formats 22 | 23 | "type": String ['delta', 'excel'] 24 | 25 | for 'excel' : 26 | "xls": Boolean 27 | "sheetName": String, 28 | "extractHeader": Boolean, 29 | "hasMergedCells": Boolean 30 | 31 | #} 32 | 33 | {% macro format_clause_from_config() -%} 34 | {%- set key_map = {'format':'type' 35 | ,'field_delimiter':'fieldDelimiter' 36 | ,'line_delimiter':'lineDelimiter' 37 | ,'skip_first_line':'skipFirstLine' 38 | ,'extract_header':'extractHeader' 39 | ,'trim_header':'trimHeader' 40 | ,'auto_generated_column_names':'autoGenerateColumnNames' 41 | ,'pretty_print':'prettyPrint'} -%} 42 | {%- set options = [] -%} 43 | {%- set format = config.get('format', validator=validation.any[basestring]) or 'iceberg' -%} 44 | {%- if format in ['text', 'json', 'arrow', 'parquet'] -%} 45 | {%- do options.append("type=>'" ~ format ~ "'") -%} 46 | {%- if format == 'text' -%} 47 | {%- for key in ['field_delimiter', 'line_delimiter', 'quote', 'comment', 'escape'] -%} 48 | {%- set value = config.get(key, validator=validation.any[basestring]) -%} 49 | {%- set key = key_map[key] or key -%} 50 | {%- if value is not none -%} 51 | {%- do options.append(key ~ "=>'" ~ value ~ "'") -%} 52 | {%- endif -%} 53 | {%- endfor -%} 54 | {%- for key in ['skip_first_line', 'extract_header', 'trim_header', 'auto_generated_column_names'] -%} 55 | {%- set value = config.get(key, validator=validation.any[boolean]) -%} 56 | {%- set key = key_map[key] or key -%} 57 | {%- if value is not none -%} 58 | {%- do options.append(key ~ "=>" ~ value) -%} 59 | {%- endif -%} 60 | {%- endfor -%} 61 | {%- elif format == 'json' -%} 62 | {%- set key = 'pretty_print' -%} 63 | {%- set value = config.get(key, validator=validation.any[boolean]) -%} 64 | {%- set key = key_map[key] or key -%} 65 | {%- if value is not none -%} 66 | {%- do options.append(key ~ "=>" ~ value) -%} 67 | {%- endif -%} 68 | {%- endif -%} 69 | {%- endif -%} 70 | {{ return((options | join(', ')) if options | length > 0 else none) }} 71 | {%- endmacro -%} 72 | 73 | {%- macro format_clause_from_node(config) -%} 74 | {%- set key_map = {'format':'type' 75 | ,'field_delimiter':'fieldDelimiter' 76 | ,'line_delimiter':'lineDelimiter' 77 | ,'skip_first_line':'skipFirstLine' 78 | ,'extract_header':'extractHeader' 79 | ,'trim_header':'trimHeader' 80 | ,'auto_generated_column_names':'autoGenerateColumnNames' 81 | ,'pretty_print':'prettyPrint' 82 | ,'sheet_name':'sheetName' 83 | ,'has_merged_cells':'hasMergedCells'} -%} 84 | {%- set options = [] -%} 85 | {%- set format = config['format'] -%} 86 | {%- if format is defined and format is string and format in ['text', 'json', 'arrow', 'parquet', 'avro', 'excel', 'delta'] -%} 87 | {%- do options.append("type=>'" ~ format ~ "'") -%} 88 | {%- endif -%} 89 | {%- if format == 'text' -%} 90 | {%- for key in ['field_delimiter', 'line_delimiter', 'quote', 'comment', 'escape'] -%} 91 | {%- set value = config[key] -%} 92 | {%- if value is defined and value is string -%} 93 | {%- set key = key_map[key] or key -%} 94 | {%- do options.append(key ~ "=>'" ~ value ~ "'") -%} 95 | {%- endif -%} 96 | {%- endfor -%} 97 | {%- for key in ['skip_first_line', 'extract_header', 'trim_header', 'auto_generated_column_names'] -%} 98 | {%- set value = config[key] -%} 99 | {%- if value is defined and value is boolean -%} 100 | {%- set key = key_map[key] or key -%} 101 | {%- do options.append(key ~ "=>" ~ value) -%} 102 | {%- endif -%} 103 | {%- endfor -%} 104 | {%- elif format == 'json' -%} 105 | {%- set key = 'pretty_print' -%} 106 | {%- set value = config[key] -%} 107 | {%- if value is defined and value is boolean -%} 108 | {%- set key = key_map[key] or key -%} 109 | {%- do options.append(key ~ "=>" ~ value) -%} 110 | {%- endif -%} 111 | {%- elif format == 'excel' -%} 112 | {%- for key in ['sheet_name'] -%} 113 | {%- set value = config[key] -%} 114 | {%- if value is defined and value is string -%} 115 | {%- set key = key_map[key] or key -%} 116 | {%- do options.append(key ~ "=>'" ~ value ~ "'") -%} 117 | {%- endif -%} 118 | {%- endfor -%} 119 | {%- for key in ['xls', 'extract_header', 'has_merged_cells'] -%} 120 | {%- set value = config[key] -%} 121 | {%- if value is defined and value is boolean -%} 122 | {%- set key = key_map[key] or key -%} 123 | {%- do options.append(key ~ "=>" ~ value) -%} 124 | {%- endif -%} 125 | {%- endfor -%} 126 | {%- endif -%} 127 | {{ return((options | join(', ')) if options | length > 0 else none) }} 128 | {%- endmacro -%} 129 | 130 | {% macro render_with_format_clause(target_table) %} 131 | {%- set options = format_clause_from_config() -%} 132 | {% if options is not none -%} 133 | table( 134 | {%- endif %} 135 | {{ target_table }} 136 | {%- if options is not none -%} 137 | ( {{ options }} )) 138 | {%- endif -%} 139 | {% endmacro %} 140 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/metadata.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__get_catalog(information_schema, schemas) -%} 2 | {%- set database = information_schema.database.strip('"') -%} 3 | {%- set table_schemas = [] -%} 4 | {%- for schema in schemas -%} 5 | {%- set schema = schema.strip('"') -%} 6 | {%- do table_schemas.append( 7 | "'" + database + (('.' + schema) if schema != 'no_schema' else '') + "'" 8 | ) -%} 9 | {%- endfor -%} 10 | {%- call statement('catalog', fetch_result=True) -%} 11 | 12 | with cte as ( 13 | 14 | {%- if var('dremio:reflections_enabled', true) %} 15 | 16 | select 17 | case when position('.' in table_schema) > 0 18 | then substring(table_schema, 1, position('.' in table_schema) - 1) 19 | else table_schema 20 | end as table_database 21 | ,case when position('.' in table_schema) > 0 22 | then substring(table_schema, position('.' in table_schema) + 1) 23 | else 'no_schema' 24 | end as table_schema 25 | ,reflection_name as table_name 26 | ,'materializedview' as table_type 27 | ,case 28 | when nullif(external_reflection, '') is not null then 'target: ' || external_reflection 29 | when arrow_cache then 'arrow cache' 30 | end as table_comment 31 | ,column_name 32 | ,ordinal_position as column_index 33 | ,lower(data_type) as column_type 34 | ,concat( 35 | case when strpos(regexp_replace(display_columns, '$|, |^', '/'), '/' || column_name || '/') > 0 then 'display' end 36 | ,', ',case when strpos(regexp_replace(dimensions, '$|, |^', '/'), '/' || column_name || '/') > 0 then 'dimension' end 37 | ,', ',case when strpos(regexp_replace(measures, '$|, |^', '/'), '/' || column_name || '/') > 0 then 'measure' end 38 | ,', ',case when strpos(regexp_replace(sort_columns, '$|, |^', '/'), '/' || column_name || '/') > 0 then 'sort' end 39 | ,', ',case when strpos(regexp_replace(partition_columns, '$|, |^', '/'), '/' || column_name || '/') > 0 then 'partition' end 40 | ,', ',case when strpos(regexp_replace(distribution_columns, '$|, |^', '/'), '/' || column_name || '/') > 0 then 'distribution' end 41 | ) as column_comment 42 | ,cast(null as varchar) as table_owner 43 | from sys.reflections 44 | join information_schema.columns 45 | on (columns.table_schema || '.' || columns.table_name = replace(dataset_name, '"', '') 46 | and (strpos(regexp_replace(display_columns, '$|, |^', '/'), '/' || column_name || '/') > 0 47 | or strpos(regexp_replace(dimensions, '$|, |^', '/'), '/' || column_name || '/') > 0 48 | or strpos(regexp_replace(measures, '$|, |^', '/'), '/' || column_name || '/') > 0 )) 49 | where 50 | {% for table_schema in table_schemas -%} 51 | ilike( table_schema, {{ table_schema.strip('"') }}){%- if not loop.last %} or {% endif -%} 52 | {%- endfor %} 53 | 54 | union all 55 | 56 | {%- endif %} 57 | 58 | select (case when position('.' in columns.table_schema) > 0 59 | then substring(columns.table_schema, 1, position('.' in columns.table_schema) - 1) 60 | else columns.table_schema 61 | end) as table_database 62 | ,(case when position('.' in columns.table_schema) > 0 63 | then substring(columns.table_schema, position('.' in columns.table_schema) + 1) 64 | else 'no_schema' 65 | end) as table_schema 66 | ,columns.table_name 67 | ,lower(t.table_type) as table_type 68 | ,cast(null as varchar) as table_comment 69 | ,column_name 70 | ,ordinal_position as column_index 71 | ,lower(data_type) as column_type 72 | ,cast(null as varchar) as column_comment 73 | ,cast(null as varchar) as table_owner 74 | from information_schema."tables" as t 75 | join information_schema.columns 76 | on (t.table_schema = columns.table_schema 77 | and t.table_name = columns.table_name) 78 | where t.table_type <> 'SYSTEM_TABLE' 79 | and ( 80 | {%- for table_schema in table_schemas -%} 81 | ilike( t.table_schema, {{ table_schema.strip('"') }}){%- if not loop.last %} or {% endif -%} 82 | {%- endfor -%} 83 | ) 84 | ) 85 | 86 | select * 87 | from cte 88 | order by table_schema 89 | ,table_name 90 | ,column_index 91 | {%- endcall -%} 92 | {{ return(load_result('catalog').table) }} 93 | {%- endmacro %} 94 | 95 | {% macro dremio__information_schema_name(database) -%} 96 | information_schema 97 | {%- endmacro %} 98 | 99 | {% macro dremio__list_schemas(database) -%} 100 | {%- set schema_name_like = database.strip('"') + '.%' -%} 101 | {% set sql %} 102 | select substring(schema_name, position('.' in schema_name) + 1) 103 | from information_schema.schemata 104 | where ilike(schema_name, '{{ schema_name_like }}') 105 | union 106 | values('no_schema') 107 | {% endset %} 108 | {{ return(run_query(sql)) }} 109 | {% endmacro %} 110 | 111 | {% macro dremio__check_schema_exists(information_schema, schema) -%} 112 | {%- set schema_name = information_schema.database.strip('"') 113 | + (('.' + schema) if schema != 'no_schema' else '') -%} 114 | {% set sql -%} 115 | select count(*) 116 | from information_schema.schemata 117 | where ilike(schema_name, '{{ schema_name }}') 118 | {%- endset %} 119 | {{ return(run_query(sql)) }} 120 | {% endmacro %} 121 | 122 | {% macro dremio__list_relations_without_caching(schema_relation) %} 123 | {%- set database = schema_relation.database.strip('"') -%} 124 | {%- set schema = schema_relation.schema.strip('"') -%} 125 | {%- set schema_name = database 126 | + (('.' + schema) if schema != 'no_schema' else '') -%} 127 | {% call statement('list_relations_without_caching', fetch_result=True) -%} 128 | 129 | {%- if var('dremio:reflections_enabled', true) -%} 130 | 131 | with cte1 as ( 132 | select 133 | dataset_name 134 | ,reflection_name 135 | ,type 136 | ,case when substr(dataset_name, 1, 1) = '"' 137 | then strpos(dataset_name, '".') + 1 138 | else strpos(dataset_name, '.') 139 | end as first_dot 140 | ,length(dataset_name) - 141 | case when substr(dataset_name, length(dataset_name)) = '"' 142 | then strpos(reverse(dataset_name), '".') 143 | else strpos(reverse(dataset_name), '.') - 1 144 | end as last_dot 145 | ,length(dataset_name) as length 146 | from sys.reflections 147 | ) 148 | , cte2 as ( 149 | select 150 | replace(substr(dataset_name, 1, first_dot - 1), '"', '') as table_catalog 151 | ,reflection_name as table_name 152 | ,replace(case when first_dot < last_dot 153 | then substr(dataset_name, first_dot + 1, last_dot - first_dot - 1) 154 | else 'no_schema' end, '"', '') as table_schema 155 | ,'materializedview' as table_type 156 | from cte1 157 | ) 158 | select table_catalog, table_name, table_schema, table_type 159 | from cte2 160 | where ilike(table_catalog, '{{ database }}') 161 | and ilike(table_schema, '{{ schema }}') 162 | 163 | union all 164 | 165 | {%- endif %} 166 | 167 | select (case when position('.' in table_schema) > 0 168 | then substring(table_schema, 1, position('.' in table_schema) - 1) 169 | else table_schema 170 | end) as table_catalog 171 | ,table_name 172 | ,(case when position('.' in table_schema) > 0 173 | then substring(table_schema, position('.' in table_schema) + 1) 174 | else 'no_schema' 175 | end) as table_schema 176 | ,lower(table_type) as table_type 177 | from information_schema."tables" 178 | where ilike(table_schema, '{{ schema_name }}') 179 | and table_type <> 'system_table' 180 | 181 | {% endcall %} 182 | {% set t = load_result('list_relations_without_caching').table %} 183 | {{ return(t) }} 184 | {% endmacro %} 185 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/relation.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__make_temp_relation(base_relation, suffix) %} 2 | {% set tmp_identifier = base_relation.identifier ~ suffix %} 3 | {% set tmp_relation = base_relation.incorporate( 4 | path={"identifier": tmp_identifier}) -%} 5 | {% do return(tmp_relation) %} 6 | {% endmacro %} 7 | 8 | {% macro dremio__drop_relation(relation) -%} 9 | {% call statement('drop_relation', auto_begin=False) -%} 10 | drop {{ relation.type }} if exists {{ relation }} 11 | {%- endcall %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/schema.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__create_schema(relation) -%} 2 | {{ log('create_schema macro (' + relation.render() + ') not implemented yet for adapter ' + adapter.type(), info=True) }} 3 | {% endmacro %} 4 | 5 | {% macro dremio__drop_schema(relation) -%} 6 | {{ exceptions.raise_not_implemented( 7 | 'drop_schema macro not implemented for adapter '+adapter.type()) }} 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/adapters/sources.sql: -------------------------------------------------------------------------------- 1 | {%- macro get_quoted_csv_source_columns(source_name, table_name) -%} 2 | {%- if execute -%} 3 | {%- set source = graph.sources.values() | selectattr("source_name", "equalto", source_name) | selectattr("name", "equalto", table_name) | list | first -%} 4 | {# Got a hard time finding the following line !!! #} 5 | {%- for col_name, column in source.columns.items() -%} 6 | {%- if column.name in source.external.date_columns -%} 7 | {%- set date_format = source.external.date_format if source.external.date_format is defined else 'YYYY-MM-DD' -%} 8 | to_date(nullif(btrim({{ adapter.quote(column.name) }}), ''), '{{ date_format }}') 9 | {%- elif column.name in source.external.decimal_columns -%} 10 | {%- set decimal_separator = source.external.decimal_separator if source.external.decimal_separator is defined else ',' -%} 11 | {%- set decimals = source.external.decimals if source.external.decimals is defined else 2 -%} 12 | cast(cast(replace(nullif(btrim({{ adapter.quote(column.name) }}), ''), '{{ decimal_separator }}', '.') as double) as decimal(100, {{ decimals }})) 13 | {%- else -%} 14 | nullif(btrim({{ adapter.quote(column.name) }}), '') 15 | {%- endif -%} as {{ adapter.quote(column.name) }} 16 | {%- if not loop.last %} 17 | ,{%- endif -%} 18 | {%- endfor -%} 19 | {%- endif -%} 20 | {%- endmacro -%} 21 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/builtins/builtins.sql: -------------------------------------------------------------------------------- 1 | {%- macro ref(model_name) -%} 2 | {%- set relation = builtins.ref(model_name) -%} 3 | {%- if execute -%} 4 | {%- set model = graph.nodes.values() | selectattr("name", "equalto", model_name) | list | first -%} 5 | {%- if model.config.materialized == 'reflection' -%} 6 | {% do exceptions.raise_compiler_error("Reflections cannot be ref()erenced (" ~ relation ~ ")") %} 7 | {%- endif -%} 8 | {%- set format = model.config.format if 9 | model.config.materialized not in ['view', 'reflection'] 10 | and model.config.format is defined 11 | else none -%} 12 | {%- set format_clause = format_clause_from_node(model.config) if format is not none else none -%} 13 | {%- set relation2 = api.Relation.create(database=relation.database, schema=relation.schema, identifier=relation.identifier, format=format, format_clause=format_clause) -%} 14 | {{ return (relation2) }} 15 | {%- else -%} 16 | {{ return (relation) }} 17 | {%- endif -%} 18 | {%- endmacro -%} 19 | 20 | {%- macro source(source_name, table_name) -%} 21 | {%- set relation = builtins.source(source_name, table_name) -%} 22 | {%- if execute -%} 23 | {%- set source = graph.sources.values() | selectattr("source_name", "equalto", source_name) | selectattr("name", "equalto", table_name) | list | first -%} 24 | {%- set format = source.external.format if 25 | source.external is defined 26 | and source.external.format is defined 27 | else none -%} 28 | {%- set format_clause = format_clause_from_node(source.external) if format is not none else none -%} 29 | {%- set relation2 = api.Relation.create(database=relation.database, schema=relation.schema, identifier=relation.identifier, format=format, format_clause=format_clause) -%} 30 | {{ return (relation2) }} 31 | {%- else -%} 32 | {{ return (relation) }} 33 | {%- endif -%} 34 | {%- endmacro -%} 35 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/get_custom_name/get_custom_alias.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__generate_alias_name(custom_alias_name=none, node=none) -%} 2 | {%- set custom_alias_name = custom_alias_name if not is_datalake_node(node) 3 | else node.config.file -%} 4 | {{ generate_alias_name_impl(node.name, custom_alias_name, node) }} 5 | {%- endmacro %} 6 | 7 | {% macro generate_alias_name_impl(default_alias, custom_alias_name=none, node=none) -%} 8 | {%- if custom_alias_name is none -%} 9 | 10 | {{ default_alias }} 11 | 12 | {%- else -%} 13 | 14 | {{ custom_alias_name | trim }} 15 | 16 | {%- endif -%} 17 | {%- endmacro %} 18 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/get_custom_name/get_custom_database.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__generate_database_name(custom_database_name=none, node=none) -%} 2 | {%- set default_database = target.database if not is_datalake_node(node) 3 | else target.datalake -%} 4 | {%- set custom_database_name = custom_database_name if not is_datalake_node(node) 5 | else node.config.datalake -%} 6 | {{ generate_database_name_impl(default_database, custom_database_name, node) }} 7 | {%- endmacro %} 8 | 9 | {% macro generate_database_name_impl(default_database, custom_database_name=none, node=none) -%} 10 | {%- if custom_database_name is none -%} 11 | 12 | {{ default_database }} 13 | 14 | {%- else -%} 15 | 16 | {{ custom_database_name }} 17 | 18 | {%- endif -%} 19 | {%- endmacro %} 20 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/get_custom_name/get_custom_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__generate_schema_name(custom_schema_name, node) -%} 2 | {%- set default_schema = target.schema if not is_datalake_node(node) 3 | else target.root_path -%} 4 | {%- set custom_schema_name = custom_schema_name if not is_datalake_node(node) 5 | else node.config.root_path -%} 6 | {{ generate_schema_name_impl(default_schema, custom_schema_name, node) }} 7 | {%- endmacro %} 8 | 9 | {% macro generate_schema_name_impl(default_schema, custom_schema_name=none, node=none) -%} 10 | {%- if custom_schema_name is none -%} 11 | 12 | {{ default_schema }} 13 | 14 | {%- else -%} 15 | 16 | {{ custom_schema_name }} 17 | 18 | {%- endif -%} 19 | {%- endmacro %} 20 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/get_custom_name/is_datalake_node.sql: -------------------------------------------------------------------------------- 1 | {% macro is_datalake_node(node) -%} 2 | {{ return(node.resource_type in ['test', 'seed'] 3 | or (node.resource_type == 'model' and node.config.materialized not in ['view', 'reflection'])) }} 4 | {%- endmacro %} 5 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/helpers.sql: -------------------------------------------------------------------------------- 1 | {% macro config_cols(label, default_cols=none) %} 2 | {%- set cols = config.get(label | replace(" ", "_"), validator=validation.any[list, basestring]) or default_cols -%} 3 | {%- if cols is not none %} 4 | {%- if cols is string -%} 5 | {%- set cols = [cols] -%} 6 | {%- endif -%} 7 | {{ label }} ( 8 | {%- for item in cols -%} 9 | {{ adapter.quote(item) }} 10 | {%- if not loop.last -%},{%- endif -%} 11 | {%- endfor -%} 12 | ) 13 | {%- endif %} 14 | {%- endmacro -%} 15 | 16 | {% macro partition_method() %} 17 | {%- set method = config.get('partition_method', validator=validation.any[basestring]) -%} 18 | {%- if method is not none -%} 19 | {{ method }} 20 | {%- endif %} 21 | {%- endmacro -%} 22 | 23 | {%- macro join_using(left_table, right_table, left_columns, right_columns=none) -%} 24 | {%- for column_name in left_columns -%} 25 | {{ left_table }}.{{ column_name }} = {{ right_table }}.{{ right_columns[loop.index0] if right_columns else column_name }} 26 | {% if not loop.last %} and {% endif -%} 27 | {%- endfor -%} 28 | {%- endmacro -%} 29 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/incremental/incremental.sql: -------------------------------------------------------------------------------- 1 | {% materialization incremental, adapter='dremio' -%} 2 | 3 | {%- set identifier = model['alias'] -%} 4 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 5 | {%- set target_relation = api.Relation.create(identifier=identifier, 6 | schema=schema, 7 | database=database, 8 | type='table') -%} 9 | 10 | {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} 11 | {%- set tmp_relation = adapter.get_relation(database=database, schema=schema, identifier=tmp_identifier) -%} 12 | {%- set target_tmp_relation = api.Relation.create(identifier=tmp_identifier, 13 | schema=schema, 14 | database=database, 15 | type='table') -%} 16 | 17 | {%- set raw_strategy = config.get('incremental_strategy', validator=validation.any[basestring]) or 'append' -%} 18 | {%- set raw_file_format = config.get('format', validator=validation.any[basestring]) or 'iceberg' -%} 19 | {%- set file_format = dbt_dremio_validate_get_file_format(raw_file_format) -%} 20 | {%- set strategy = dbt_dremio_validate_get_incremental_strategy(raw_strategy, file_format) -%} 21 | {%- set unique_key = config.get('unique_key', validator=validation.any[list, basestring]) -%} 22 | {%- set full_refresh_mode = (should_full_refresh()) -%} 23 | {%- set raw_on_schema_change = config.get('on_schema_change', validator=validation.any[basestring]) or 'ignore' -%} 24 | {%- set on_schema_change = incremental_validate_on_schema_change(raw_on_schema_change) -%} 25 | {%- set full_refresh_mode = (should_full_refresh()) -%} 26 | 27 | {{ run_hooks(pre_hooks) }} 28 | 29 | {% if old_relation is none %} 30 | {% set build_sql = create_table_as(False, target_relation, external_query(sql)) %} 31 | {% elif full_refresh_mode %} 32 | {% do adapter.drop_relation(old_relation) %} 33 | {% set build_sql = create_table_as(False, target_relation, external_query(sql)) %} 34 | {% else %} 35 | {% if tmp_relation is not none %} 36 | {{ adapter.drop_relation(tmp_relation) }} 37 | {% endif %} 38 | {{ run_query(create_table_as(True, target_tmp_relation, external_query(sql))) }} 39 | {{ process_schema_changes(on_schema_change, target_tmp_relation, old_relation) }} 40 | {% set build_sql = dbt_dremio_get_incremental_sql(strategy, target_tmp_relation, target_relation, unique_key) %} 41 | {% endif %} 42 | 43 | {%- call statement('main') -%} 44 | {{ build_sql }} 45 | {%- endcall -%} 46 | 47 | {% if not(old_relation is none or full_refresh_mode) %} 48 | {{ adapter.drop_relation(target_tmp_relation) }} 49 | {% endif %} 50 | 51 | {{ refresh_metadata(target_relation, raw_file_format) }} 52 | 53 | {{ apply_twin_strategy(target_relation) }} 54 | 55 | {% do persist_docs(target_relation, model) %} 56 | 57 | {{ run_hooks(post_hooks) }} 58 | 59 | {{ return({'relations': [target_relation]})}} 60 | 61 | {%- endmaterialization %} 62 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/incremental/strategies.sql: -------------------------------------------------------------------------------- 1 | {% macro get_insert_into_sql(source_relation, target_relation) %} 2 | 3 | {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} 4 | {%- set src_columns = adapter.get_columns_in_relation(source_relation) -%} 5 | {%- set intersection = intersect_columns(src_columns, dest_columns) -%} 6 | {%- set dest_cols_csv = intersection | map(attribute='quoted') | join(', ') -%} 7 | insert into {{ target_relation }}( {{dest_cols_csv}} ) 8 | select {{dest_cols_csv}} from {{ source_relation }} 9 | 10 | {% endmacro %} 11 | 12 | {% macro dbt_dremio_get_incremental_sql(strategy, source, target, unique_key) %} 13 | {%- if strategy == 'append' -%} 14 | {#-- insert new records into existing table, without updating or overwriting #} 15 | {{ get_insert_into_sql(source, target) }} 16 | {%- else -%} 17 | {% set no_sql_for_strategy_msg -%} 18 | No known SQL for the incremental strategy provided: {{ strategy }} 19 | {%- endset %} 20 | {%- do exceptions.raise_compiler_error(no_sql_for_strategy_msg) -%} 21 | {%- endif -%} 22 | 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/incremental/validate.sql: -------------------------------------------------------------------------------- 1 | {% macro dbt_dremio_validate_get_file_format(raw_file_format) %} 2 | {#-- Validate the file format #} 3 | 4 | {% set accepted_formats = ['iceberg'] %} 5 | 6 | {% set invalid_file_format_msg -%} 7 | Invalid file format provided: {{ raw_file_format }} 8 | Expected one of: {{ accepted_formats | join(', ') }} 9 | {%- endset %} 10 | 11 | {% if raw_file_format not in accepted_formats %} 12 | {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} 13 | {% endif %} 14 | 15 | {% do return(raw_file_format) %} 16 | {% endmacro %} 17 | 18 | {% macro dbt_dremio_validate_get_incremental_strategy(raw_strategy, file_format) %} 19 | {#-- Validate the incremental strategy #} 20 | 21 | {% set invalid_strategy_msg -%} 22 | Invalid incremental strategy provided: {{ raw_strategy }} 23 | Expected one of: 'append' 24 | {%- endset %} 25 | 26 | {% if raw_strategy not in ['append'] %} 27 | {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} 28 | {% endif %} 29 | 30 | {% do return(raw_strategy) %} 31 | {% endmacro %} 32 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/reflection/create_reflection.sql: -------------------------------------------------------------------------------- 1 | {# 2 | ALTER TABLE tblname 3 | ADD RAW REFLECTION name 4 | USING 5 | DISPLAY (field1, field2) 6 | [ DISTRIBUTE BY (field1, field2, ..) ] 7 | [ (STRIPED, CONSOLIDATED) PARTITION BY (field1, field2, ..) ] 8 | [ LOCALSORT BY (field1, field2, ..) ] 9 | [ ARROW CACHE ] 10 | 11 | ALTER TABLE tblname 12 | ADD AGGREGATE REFLECTION name 13 | USING 14 | DIMENSIONS (field1, field2) 15 | MEASURES (field1, field2) 16 | [ DISTRIBUTE BY (field1, field2, ..) ] 17 | [ (STRIPED, CONSOLIDATED) PARTITION BY (field1, field2, ..) ] 18 | [ LOCALSORT BY (field1, field2, ..) ] 19 | [ ARROW CACHE ] 20 | 21 | ALTER TABLE tblname 22 | ADD EXTERNAL REFLECTION name 23 | USING target 24 | #} 25 | 26 | {%- macro create_reflection(reflection_type, anchor, reflection, external_target=none, 27 | display=none, dimensions=none, by_day_dimensions=none, measures=none) %} 28 | alter dataset {{ anchor }} 29 | create {{ reflection_type }} reflection {{ reflection.include(database=False, schema=False) }} 30 | using 31 | {%- if reflection_type == 'raw' %} 32 | {{ display_clause(display) }} 33 | {%- elif reflection_type == 'aggregate' %} 34 | {{ dimensions_clause(dimensions=dimensions, by_day_dimensions=by_day_dimensions) }} 35 | {{ measures_clause(measures) }} 36 | {%- else -%} 37 | {{ external_target }} 38 | {% endif -%} 39 | {%- if reflection_type in ['raw', 'aggregate'] %} 40 | {{ partition_method() }} {{ config_cols("partition by") }} 41 | {{ config_cols("localsort by") }} 42 | {{ config_cols("distribute by") }} 43 | {{ arrow_cache_clause() }} 44 | {%- endif -%} 45 | {% endmacro -%} 46 | 47 | {%- macro display_clause(display=none) %} 48 | {%- set cols = config.get('display', validator=validation.any[list, basestring]) or display -%} 49 | {%- if cols is not none %} 50 | {%- if cols is string -%} 51 | {%- set cols = [cols] -%} 52 | {%- endif -%} 53 | display ( 54 | {%- for item in cols -%} 55 | {{ adapter.quote(item) }} 56 | {%- if not loop.last -%},{%- endif -%} 57 | {%- endfor -%} 58 | ) 59 | {%- endif %} 60 | {% endmacro -%} 61 | 62 | {%- macro dimensions_clause(dimensions=none, by_day_dimensions=none) %} 63 | {%- set cols = config.get('dimensions', validator=validation.any[list, basestring]) or dimensions -%} 64 | {%- set by_day_cols = config.get('by_day_dimensions', validator=validation.any[list, basestring]) or by_day_dimensions -%} 65 | {%- if cols is not none %} 66 | {%- if cols is string -%} 67 | {%- set cols = [cols] -%} 68 | {%- endif -%} 69 | {%- if by_day_cols is string -%} 70 | {%- set by_day_cols = [by_day_cols] -%} 71 | {%- endif -%} 72 | dimensions ( 73 | {%- for item in cols -%} 74 | {{ adapter.quote(item) ~ (' by day' if item in by_day_cols else "") }} 75 | {%- if not loop.last -%},{%- endif -%} 76 | {%- endfor -%} 77 | ) 78 | {%- endif %} 79 | {% endmacro -%} 80 | 81 | {%- macro measures_clause(measures=none) %} 82 | {%- set cols = config.get('measures', validator=validation.any[list, basestring]) or measures -%} 83 | {%- set comp_cols = config.get('computations', validator=validation.any[list, basestring]) or [] -%} 84 | {%- if cols is not none %} 85 | {%- if cols is string -%} 86 | {%- set cols = [cols] -%} 87 | {%- endif -%} 88 | {%- if comp_cols is string -%} 89 | {%- set comp_cols = [comp_cols] -%} 90 | {%- endif -%} 91 | measures ( 92 | {%- for item in cols -%} 93 | {%- set computations = (' (' ~ comp_cols[loop.index0] ~ ')') 94 | if loop.index0 < comp_cols | length and comp_cols[loop.index0] is not none else '' -%} 95 | {{ adapter.quote(item) ~ computations }} 96 | {%- if not loop.last -%},{%- endif -%} 97 | {%- endfor -%} 98 | ) 99 | {%- endif %} 100 | {% endmacro -%} 101 | 102 | {%- macro arrow_cache_clause() -%} 103 | {%- set arrow_cache = config.get('arrow_cache', validator=validation.any[boolean]) -%} 104 | {%- if arrow_cache is not none and arrow_cache -%} 105 | arrow cache 106 | {%- endif -%} 107 | {% endmacro -%} 108 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/reflection/helpers.sql: -------------------------------------------------------------------------------- 1 | {% macro drop_reflection_if_exists(relation, reflection) %} 2 | {% if reflection is not none and reflection.type == 'materializedview' %} 3 | {% call statement('drop reflection') -%} 4 | alter dataset {{ relation }} 5 | drop reflection {{ reflection.include(database=False, schema=False) }} 6 | {%- endcall %} 7 | {% endif %} 8 | {% endmacro %} 9 | 10 | {% macro dbt_dremio_validate_get_reflection_type(raw_reflection_type) %} 11 | {% set accepted_types = ['raw', 'aggregate', 'external'] %} 12 | {% set invalid_reflection_type_msg -%} 13 | Invalid reflection type provided: {{ raw_reflection_type }} 14 | Expected one of: {{ accepted_types | join(', ') }} 15 | {%- endset %} 16 | {% if raw_reflection_type not in accepted_types %} 17 | {% do exceptions.raise_compiler_error(invalid_reflection_type_msg) %} 18 | {% endif %} 19 | {% do return(raw_reflection_type) %} 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/reflection/reflection.sql: -------------------------------------------------------------------------------- 1 | {% materialization reflection, adapter='dremio' %} 2 | {%- if not var('dremio:reflections_enabled', true) -%} 3 | {% do exceptions.raise_compiler_error("reflections are disabled, set 'dremio:reflections_enabled' variable to true to enable them") %} 4 | {%- endif -%} 5 | 6 | {% set raw_reflection_type = config.get('reflection_type', validator=validation.any[basestring]) or 'raw' %} 7 | {% set raw_anchor = config.get('anchor', validator=validation.any[list, basestring]) %} 8 | {% set raw_external_target = config.get('external_target', validator=validation.any[list, basestring]) %} 9 | {% set identifier = model['alias'] %} 10 | {%- set display = config.get('display', validator=validation.any[list, basestring]) -%} 11 | {%- set dimensions = config.get('dimensions', validator=validation.any[list, basestring]) -%} 12 | {%- set measures = config.get('measures', validator=validation.any[list, basestring]) -%} 13 | 14 | {% if model.refs | length + model.sources | length == 1 %} 15 | {% if model.refs | length == 1 %} 16 | {% set anchor = ref(model.refs[0][0]) %} 17 | {% else %} 18 | {% set anchor = source(model.sources[0][0], model.sources[0][1]) %} 19 | {% endif %} 20 | {% elif model.refs | length + model.sources | length > 1 %} 21 | {% if raw_anchor is not none %} 22 | {% if raw_anchor is string %} 23 | {% set raw_anchor = [raw_anchor] %} 24 | {% endif %} 25 | {% if raw_anchor | length == 1 %} 26 | {% set anchor = ref(raw_anchor[0]) %} 27 | {% elif raw_anchor | length == 2 %} 28 | {% set anchor = source(raw_anchor[0], raw_anchor[1]) %} 29 | {% endif %} 30 | {% endif %} 31 | {% if raw_external_target is not none %} 32 | {% if raw_external_target is string %} 33 | {% set raw_external_target = [raw_external_target] %} 34 | {% endif %} 35 | {% if raw_external_target | length == 1 %} 36 | {% set external_target = ref(raw_external_target[0]) %} 37 | {% elif raw_external_target | length == 2 %} 38 | {% set external_target = source(raw_external_target[0], raw_external_target[1]) %} 39 | {% endif %} 40 | {% endif %} 41 | {% endif %} 42 | 43 | {%- set old_relation = adapter.get_relation(database=anchor.database, schema=anchor.schema, identifier=identifier) -%} 44 | {%- set target_relation = api.Relation.create( 45 | identifier=identifier, schema=anchor.schema, database=anchor.database, type='materializedview') -%} 46 | 47 | {%- set reflection_type = dbt_dremio_validate_get_reflection_type(raw_reflection_type) -%} 48 | {% if (reflection_type == 'raw' and display is none) 49 | or (reflection_type == 'aggregate' and (dimensions is none or measures is none)) %} 50 | {% set columns = adapter.get_columns_in_relation(anchor) %} 51 | {% if reflection_type == 'raw' %} 52 | {% set display = columns | map(attribute='name') | list %} 53 | {% elif reflection_type == 'aggregate' %} 54 | {% if dimensions is none %} 55 | {% set dimensions = columns | rejectattr('dtype', 'in', ['decimal', 'float', 'double']) | map(attribute='name') | list %} 56 | {% set by_day_dimensions = columns | selectattr('dtype', 'in', ['timestamp']) | map(attribute='name') | list %} 57 | {% endif %} 58 | {% if measures is none %} 59 | {% set measures = columns | selectattr('dtype', 'in', ['decimal', 'float', 'double']) | map(attribute='name') | list %} 60 | {% endif %} 61 | {% endif %} 62 | {% endif %} 63 | 64 | {{ run_hooks(pre_hooks) }} 65 | 66 | {{ drop_reflection_if_exists(anchor, old_relation) }} 67 | -- build model 68 | {% call statement('main') -%} 69 | {{ create_reflection(reflection_type, anchor, target_relation, external_target, 70 | display=display, dimensions=dimensions, by_day_dimensions=by_day_dimensions, measures=measures) }} 71 | {%- endcall %} 72 | 73 | {{ run_hooks(post_hooks) }} 74 | 75 | {{ return({'relations': [target_relation]}) }} 76 | {% endmaterialization %} 77 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/seed/helpers.sql: -------------------------------------------------------------------------------- 1 | {% macro select_csv_rows(model, agate_table) %} 2 | {%- set column_override = model['config'].get('column_types', {}) -%} 3 | {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} 4 | {%- set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) -%} 5 | select 6 | {% for col_name in agate_table.column_names -%} 7 | {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} 8 | {%- set type = column_override.get(col_name, inferred_type) -%} 9 | {%- set column_name = (col_name | string) -%} 10 | cast({{ adapter.quote_seed_column(column_name, quote_seed_column) }} as {{ type }}) 11 | as {{ adapter.quote_seed_column(column_name, quote_seed_column) }}{%- if not loop.last -%}, {%- endif -%} 12 | {% endfor %} 13 | from 14 | (values 15 | {% for row in agate_table.rows %} 16 | ({%- for value in row -%} 17 | {% if value is not none %} 18 | {{ "'" ~ (value | string | replace("'", "''")) ~ "'" }} 19 | {% else %} 20 | cast(null as varchar) 21 | {% endif %} 22 | {%- if not loop.last%},{%- endif %} 23 | {%- endfor -%}) 24 | {%- if not loop.last%},{%- endif %} 25 | {% endfor %}) temp_table ( {{ cols_sql }} ) 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/seed/seed.sql: -------------------------------------------------------------------------------- 1 | {% materialization seed, adapter = 'dremio' %} 2 | 3 | {%- set identifier = model['alias'] -%} 4 | {%- set format = config.get('format', validator=validation.any[basestring]) or 'iceberg' -%} 5 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 6 | {%- set target_relation = api.Relation.create(identifier=identifier, 7 | schema=schema, 8 | database=database, 9 | type='table') -%} 10 | 11 | {{ run_hooks(pre_hooks) }} 12 | 13 | -- setup: if the target relation already exists, drop it 14 | -- in case if the existing and future table is delta, we want to do a 15 | -- create or replace table instead of dropping, so we don't have the table unavailable 16 | {% if old_relation is not none -%} 17 | {{ adapter.drop_relation(old_relation) }} 18 | {%- endif %} 19 | 20 | {%- set agate_table = load_agate_table() -%} 21 | {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%} 22 | {%- set num_rows = (agate_table.rows | length) -%} 23 | {%- set sql = select_csv_rows(model, agate_table) -%} 24 | 25 | -- build model 26 | {% call statement('effective_main') -%} 27 | {{ create_table_as(False, target_relation, sql) }} 28 | {%- endcall %} 29 | 30 | {% call noop_statement('main', 'CREATE ' ~ num_rows, 'CREATE', num_rows) %} 31 | {{ sql }} 32 | {% endcall %} 33 | 34 | {{ refresh_metadata(target_relation, format) }} 35 | 36 | {{ apply_twin_strategy(target_relation) }} 37 | 38 | {% do persist_docs(target_relation, model) %} 39 | 40 | {{ run_hooks(post_hooks) }} 41 | 42 | {{ return({'relations': [target_relation]})}} 43 | 44 | {% endmaterialization %} 45 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/table/alter_pds.sql: -------------------------------------------------------------------------------- 1 | {# 2 | ALTER PDS REFRESH METADATA 3 | [AVOID PROMOTION | AUTO PROMOTION] 4 | [FORCE UPDATE | LAZY UPDATE] 5 | [MAINTAIN WHEN MISSING | DELETE WHEN MISSING] 6 | 7 | ALTER PDS FORGET METADATA 8 | 9 | ALTER TABLE REFRESH METADATA 10 | #} 11 | 12 | {% macro refresh_metadata(relation, format='iceberg') -%} 13 | {%- if format != 'iceberg' -%} 14 | {% call statement('refresh_metadata') -%} 15 | {%- if format == 'parquet' -%} 16 | {{ alter_table_refresh_metadata(relation) }} 17 | {%- else -%} 18 | {{ alter_pds(relation, avoid_promotion=false, lazy_update=false) }} 19 | {%- endif -%} 20 | {%- endcall %} 21 | {%- endif -%} 22 | {%- endmacro -%} 23 | 24 | {% macro alter_table_refresh_metadata(table_relation) -%} 25 | alter table {{ table_relation }} refresh metadata 26 | {%- endmacro -%} 27 | 28 | {% macro alter_pds(table_relation, avoid_promotion=True, lazy_update=True, delete_when_missing=True, forget_metadata=False) -%} 29 | alter pds {{ table_relation }} refresh metadata 30 | {% if forget_metadata %} 31 | forget metadata 32 | {%- else -%} 33 | {%- if avoid_promotion %} 34 | avoid promotion 35 | {%- else %} 36 | auto promotion 37 | {%- endif %} 38 | {%- if lazy_update %} 39 | lazy update 40 | {%- else %} 41 | force update 42 | {%- endif %} 43 | {%- if delete_when_missing %} 44 | delete when missing 45 | {%- else %} 46 | maintain when missing 47 | {%- endif -%} 48 | {%- endif %} 49 | {%- endmacro -%} 50 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/table/create_table_as.sql: -------------------------------------------------------------------------------- 1 | {# 2 | /** 3 | * Parses a CTAS statement. 4 | * CREATE TABLE tblname [ (field1, field2, ...) ] 5 | * [ (STRIPED, HASH, ROUNDROBIN) PARTITION BY (field1, field2, ..) ] 6 | * [ DISTRIBUTE BY (field1, field2, ..) ] 7 | * [ LOCALSORT BY (field1, field2, ..) ] 8 | * [ STORE AS (opt1 => val1, opt2 => val3, ...) ] 9 | * [ WITH SINGLE WRITER ] 10 | * [ AS select_statement. ] 11 | */ 12 | #} 13 | 14 | {% macro dremio__create_table_as(temporary, relation, sql) -%} 15 | {%- set sql_header = config.get('sql_header', none) -%} 16 | 17 | {{ sql_header if sql_header is not none }} 18 | 19 | create table {{ relation }} 20 | {{ partition_method() }} {{ config_cols("partition by") }} 21 | {{ config_cols("distribute by") }} 22 | {{ config_cols("localsort by") }} 23 | {{ store_as_clause() }} 24 | {{ single_writer_clause() }} 25 | as ( 26 | {{ sql }} 27 | ) 28 | {%- endmacro -%} 29 | 30 | {% macro store_as_clause() -%} 31 | {%- set options = format_clause_from_config() -%} 32 | {%- if options is not none -%} 33 | store as ( {{ options }} ) 34 | {%- endif %} 35 | {%- endmacro -%} 36 | 37 | {%- macro single_writer_clause() -%} 38 | {%- set single_writer = config.get('single_writer', validator=validation.any[boolean]) -%} 39 | {%- if single_writer is not none and single_writer -%} 40 | with single writer 41 | {%- endif -%} 42 | {%- endmacro -%} 43 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/table/table.sql: -------------------------------------------------------------------------------- 1 | {% materialization table, adapter = 'dremio' %} 2 | 3 | {%- set identifier = model['alias'] -%} 4 | {%- set format = config.get('format', validator=validation.any[basestring]) or 'iceberg' -%} 5 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 6 | {%- set target_relation = api.Relation.create(identifier=identifier, 7 | schema=schema, 8 | database=database, 9 | type='table') -%} 10 | {{ run_hooks(pre_hooks) }} 11 | 12 | -- setup: if the target relation already exists, drop it 13 | -- in case if the existing and future table is delta, we want to do a 14 | -- create or replace table instead of dropping, so we don't have the table unavailable 15 | {% if old_relation is not none -%} 16 | {{ adapter.drop_relation(old_relation) }} 17 | {%- endif %} 18 | 19 | -- build model 20 | {% call statement('main') -%} 21 | {{ create_table_as(False, target_relation, external_query(sql)) }} 22 | {%- endcall %} 23 | 24 | {{ refresh_metadata(target_relation, format) }} 25 | 26 | {{ apply_twin_strategy(target_relation) }} 27 | 28 | {% do persist_docs(target_relation, model) %} 29 | 30 | {{ run_hooks(post_hooks) }} 31 | 32 | {{ return({'relations': [target_relation]})}} 33 | 34 | {% endmaterialization %} 35 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/twin_strategy.sql: -------------------------------------------------------------------------------- 1 | {%- macro apply_twin_strategy(target_relation) -%} 2 | {%- set twin_strategy = config.get('twin_strategy', validator=validation.any[basestring]) or 'clone' -%} 3 | {%- if target_relation.type == 'view' -%} 4 | {%- if twin_strategy != 'allow' -%} 5 | {%- set table_relation = api.Relation.create( 6 | identifier=generate_alias_name_impl(model.name, config.get('file', validator=validation.any[basestring]), model), 7 | schema=generate_schema_name_impl(target.root_path, config.get('root_path', validator=validation.any[basestring]), model), 8 | database=generate_database_name_impl(target.datalake, config.get('datalake', validator=validation.any[basestring]), model), 9 | type='table') -%} 10 | {{ adapter.drop_relation(table_relation) }} 11 | {%- endif -%} 12 | {%- elif target_relation.type == 'table' -%} 13 | {%- if twin_strategy in ['prevent', 'clone'] -%} 14 | {%- set view_relation = api.Relation.create( 15 | identifier=generate_alias_name_impl(model.name, config.get('alias', validator=validation.any[basestring]), model), 16 | schema=generate_schema_name_impl(target.schema, config.get('schema', validator=validation.any[basestring]), model), 17 | database=generate_database_name_impl(target.database, config.get('database', validator=validation.any[basestring]), model), 18 | type='view') -%} 19 | {%- if twin_strategy == 'prevent' -%} 20 | {{ adapter.drop_relation(view_relation) }} 21 | {%- elif twin_strategy == 'clone' -%} 22 | {%- set sql_view -%} 23 | select * 24 | from {{ render_with_format_clause(target_relation) }} 25 | {%- endset -%} 26 | {% call statement('clone_view') -%} 27 | {{ create_view_as(view_relation, sql_view) }} 28 | {%- endcall %} 29 | {%- endif -%} 30 | {%- endif -%} 31 | {%- endif -%} 32 | {%- endmacro -%} 33 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/view/create_or_replace_view.sql: -------------------------------------------------------------------------------- 1 | {% macro create_or_replace_view() %} 2 | {%- set datalake = config.get('datalake', default=target.datalake) %} 3 | {%- set root_path = config.get('root_path', default=target.root_path) %} 4 | {%- set identifier = model['alias'] -%} 5 | {%- set file = config.get('file', default=identifier) %} 6 | 7 | {%- set target_view = api.Relation.create(database=database, schema=schema, identifier=identifier, type='view') -%} 8 | {%- set target_table = api.Relation.create(database=datalake, schema=root_path, identifier=file, type='table') -%} 9 | 10 | {{ run_hooks(pre_hooks) }} 11 | 12 | -- setup: in case the model was materialized before, drop the table 13 | {{ adapter.drop_relation(target_table) }} 14 | 15 | -- build model 16 | {% call statement('main') -%} 17 | {{ get_create_view_as_sql(target_view, sql) }} 18 | {%- endcall %} 19 | 20 | {{ run_hooks(post_hooks) }} 21 | 22 | {{ return({'relations': [target_view]}) }} 23 | 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/view/create_view_as.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__create_view_as(relation, sql) -%} 2 | {%- set sql_header = config.get('sql_header', none) -%} 3 | 4 | {{ sql_header if sql_header is not none }} 5 | 6 | create or replace view {{ relation }} as ( 7 | {{ sql }} 8 | ) 9 | {%- endmacro %} 10 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/view/helpers.sql: -------------------------------------------------------------------------------- 1 | {% macro dremio__handle_existing_table(full_refresh, old_relation) %} 2 | {{ log("Dropping relation " ~ old_relation ~ " because it is of type " ~ old_relation.type) }} 3 | {{ exceptions.raise_not_implemented('Inside a dremio home space, a model cannot change from table to view materialization; please drop the table in the UI') }} 4 | {% endmacro %} 5 | 6 | {# ALTER VDS SET ENABLE_DEFAULT_REFLECTION = TRUE | FALSE #} 7 | 8 | {% macro enable_default_reflection() %} 9 | {%- set enable_default_reflection = config.get('enable_default_reflection', validator=validation.any[boolean]) -%} 10 | {%- if enable_default_reflection is not none -%} 11 | {% call statement('enable_default_reflection') -%} 12 | alter vds {{ this }} set enable_default_reflection = {{ enable_default_reflection }} 13 | {%- endcall %} 14 | {%- endif -%} 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /dbt/include/dremio/macros/materializations/view/view.sql: -------------------------------------------------------------------------------- 1 | {% materialization view, adapter='dremio' %} 2 | {%- set identifier = model['alias'] -%} 3 | {%- set twin_strategy = config.get('twin_strategy', validator=validation.any[basestring]) or 'clone' -%} 4 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 5 | 6 | {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} 7 | 8 | {%- set target_relation = api.Relation.create( 9 | identifier=identifier, schema=schema, database=database, type='view') -%} 10 | 11 | {{ run_hooks(pre_hooks) }} 12 | 13 | -- If there's a table with the same name and we weren't told to full refresh, 14 | -- that's an error. If we were told to full refresh, drop it. This behavior differs 15 | -- for Snowflake and BigQuery, so multiple dispatch is used. 16 | {%- if old_relation is not none and old_relation.is_table -%} 17 | {{ handle_existing_table(should_full_refresh(), old_relation) }} 18 | {%- endif -%} 19 | 20 | -- build model 21 | {% call statement('main') -%} 22 | {{ create_view_as(target_relation, external_query(sql)) }} 23 | {%- endcall %} 24 | 25 | {{ apply_twin_strategy(target_relation) }} 26 | 27 | {{ enable_default_reflection() }} 28 | 29 | {{ run_hooks(post_hooks) }} 30 | 31 | {{ return({'relations': [target_relation]}) }} 32 | 33 | {% endmaterialization %} 34 | -------------------------------------------------------------------------------- /dbt/include/dremio/sample_profiles.yml: -------------------------------------------------------------------------------- 1 | default: 2 | outputs: 3 | unmanaged: 4 | type: dremio 5 | threads: 2 6 | driver: [driver] 7 | host: [host] 8 | port: 31010 9 | environment: [environment] 10 | database: [database] 11 | schema: [schema] 12 | user: [user] 13 | password: [password] 14 | managed: 15 | type: dremio 16 | threads: 2 17 | driver: [driver] 18 | host: [host] 19 | port: 31010 20 | environment: [environment] 21 | database: [database] 22 | schema: [schema] 23 | user: [user] 24 | password: [password] 25 | target: unmanaged 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import find_packages 3 | from setuptools import setup 4 | 5 | package_name = "dbt-dremio" 6 | package_version = "1.0.6.5" 7 | description = """The dremio adapter plugin for dbt (data build tool)""" 8 | 9 | setup( 10 | name=package_name, 11 | version=package_version, 12 | description=description, 13 | long_description=description, 14 | author="Fabrice Etanchaud", 15 | author_email="fabrice.etanchaud@netc.fr", 16 | url="https://github.com/fabrice-etanchaud/dbt-dremio", 17 | packages=find_packages(), 18 | package_data={ 19 | 'dbt': [ 20 | 'include/dremio/macros/*.sql', 21 | 'include/dremio/macros/**/*.sql', 22 | 'include/dremio/macros/**/**/*.sql', 23 | 'include/dremio/dbt_project.yml', 24 | ] 25 | }, 26 | install_requires=[ 27 | 'dbt-core==1.0.6', 28 | 'pyodbc>=4.0.27', 29 | ] 30 | ) 31 | --------------------------------------------------------------------------------