├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── lint.yml │ ├── pypi.yml │ ├── test_cloud.yml │ └── test_matrix.yml ├── .gitignore ├── .yamllint ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── dbt ├── adapters │ └── clickhouse │ │ ├── __init__.py │ │ ├── __version__.py │ │ ├── cache.py │ │ ├── column.py │ │ ├── connections.py │ │ ├── credentials.py │ │ ├── dbclient.py │ │ ├── errors.py │ │ ├── httpclient.py │ │ ├── impl.py │ │ ├── logger.py │ │ ├── nativeclient.py │ │ ├── query.py │ │ ├── relation.py │ │ └── util.py └── include │ └── clickhouse │ ├── __init__.py │ ├── dbt_project.yml │ ├── macros │ ├── adapters.sql │ ├── adapters │ │ ├── apply_grants.sql │ │ └── relation.sql │ ├── catalog.sql │ ├── column_spec_ddl.sql │ ├── materializations │ │ ├── dictionary.sql │ │ ├── distributed_table.sql │ │ ├── incremental │ │ │ ├── distributed_incremental.sql │ │ │ ├── incremental.sql │ │ │ ├── is_incremental.sql │ │ │ └── schema_changes.sql │ │ ├── materialized_view.sql │ │ ├── s3.sql │ │ ├── seed.sql │ │ ├── snapshot.sql │ │ ├── table.sql │ │ └── view.sql │ ├── persist_docs.sql │ ├── schema_tests │ │ └── relationships.sql │ └── utils │ │ ├── datatypes.sql │ │ ├── timestamps.sql │ │ └── utils.sql │ └── sample_profiles.yml ├── dev_requirements.txt ├── etc └── chdbt.png ├── examples └── taxis │ ├── .gitignore │ ├── README.md │ ├── analyses │ └── .gitkeep │ ├── dbt_project.yml │ ├── macros │ └── .gitkeep │ ├── models │ ├── schema.yml │ ├── sources.yml │ ├── trips_inc.sql │ └── trips_rand.sql │ ├── seeds │ └── .gitkeep │ ├── snapshots │ └── .gitkeep │ └── tests │ └── .gitkeep ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── setup.py └── tests ├── conftest.py ├── integration ├── adapter │ ├── aliases │ │ └── test_aliases.py │ ├── basic │ │ ├── test_adapter_methods.py │ │ ├── test_base.py │ │ ├── test_basic.py │ │ ├── test_docs_generate.py │ │ ├── test_empty.py │ │ ├── test_ephemeral.py │ │ ├── test_generic_tests.py │ │ ├── test_incremental.py │ │ ├── test_singular_tests.py │ │ ├── test_singular_tests_ephemeral.py │ │ ├── test_snapshot_check_cols.py │ │ ├── test_snapshot_timestamp.py │ │ ├── test_table_materialization.py │ │ └── test_validate_connection.py │ ├── caching │ │ └── test_caching.py │ ├── clickhouse │ │ ├── test_clickhouse_comments.py │ │ ├── test_clickhouse_errors.py │ │ ├── test_clickhouse_s3.py │ │ ├── test_clickhouse_source_schema.py │ │ ├── test_clickhouse_sql_header.py │ │ ├── test_clickhouse_table_materializations.py │ │ ├── test_clickhouse_table_ttl.py │ │ └── test_clickhouse_upper_case.py │ ├── column_types │ │ └── test_column_types.py │ ├── concurrency │ │ └── test_concurrency.py │ ├── constraints │ │ ├── fixtures_constraints.py │ │ └── test_constraints.py │ ├── dbt_clone │ │ └── test_dbt_clone.py │ ├── dbt_debug │ │ └── test_dbt_debug.py │ ├── dbt_show │ │ └── test_dbt_show.py │ ├── dictionary │ │ └── test_dictionary.py │ ├── grants │ │ ├── test_distributed_grants.py │ │ ├── test_incremental_grants.py │ │ ├── test_invalid_grants.py │ │ ├── test_model_grants.py │ │ ├── test_seed_grants.py │ │ └── test_snapshot_grants.py │ ├── hooks │ │ └── test_model_hooks.py │ ├── incremental │ │ ├── test_base_incremental.py │ │ ├── test_distributed_incremental.py │ │ ├── test_incremental_microbatch.py │ │ ├── test_incremental_predicates.py │ │ ├── test_incremental_unique_key.py │ │ └── test_schema_change.py │ ├── materialized_view │ │ ├── test_materialized_view.py │ │ ├── test_multiple_materialized_views.py │ │ └── test_refreshable_materialized_view.py │ ├── persist_docs │ │ ├── fixtures.py │ │ └── test_persist_docs.py │ ├── projections │ │ └── test_projections.py │ ├── query_comment │ │ └── test_query_comment.py │ ├── query_settings │ │ └── test_query_settings.py │ ├── relations │ │ └── test_changing_relation_type.py │ ├── replicated_database │ │ └── test_replicated_database.py │ ├── utils │ │ ├── test_array.py │ │ ├── test_datatypes.py │ │ ├── test_dateadd.py │ │ ├── test_datediff.py │ │ ├── test_last_day.py │ │ ├── test_listagg.py │ │ ├── test_replace.py │ │ ├── test_split_part.py │ │ ├── test_timestamps.py │ │ └── test_unchanged.py │ └── view │ │ ├── test_view.py │ │ └── test_view_sql_security.py ├── conftest.py ├── docker-compose.yml ├── test_config.xml ├── test_settings_22_3.xml └── test_settings_latest.xml └── unit └── test_util.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | W503, 4 | E731, 5 | E203, # black and flake8 disagree on whitespace before ':' 6 | E501, # line too long (> 79 characters) 7 | W503, # black and flake8 disagree on how to place operators 8 | 9 | per-file-ignores = 10 | # imported but unused 11 | __init__.py: F401 12 | 13 | max-line-length = 100 14 | max-complexity = 18 15 | exclude = .venv/,venv/ -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Something not working right? Help us fix it! 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | ### Describe the bug 12 | 13 | ### Steps to reproduce 14 | 1. 15 | 2. 16 | 3. 17 | 18 | ### Expected behaviour 19 | 20 | ### Code examples, such as models or profile settings 21 | 22 | ### dbt and/or ClickHouse server logs 23 | 24 | ### Configuration 25 | #### Environment 26 | * dbt version: 27 | * dbt-clickhouse version: 28 | * clickhouse-driver version (if using native) 29 | * clickhouse-connect version (if using http): 30 | * Python version: 31 | * Operating system: 32 | 33 | 34 | #### ClickHouse server 35 | * ClickHouse Server version: 36 | * ClickHouse Server non-default settings, if any: 37 | * `CREATE TABLE` statements for tables involved: 38 | * Sample data for these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary 39 | 40 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: What would you like to add to dbt-clickhouse? 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | 4 | ## Checklist 5 | Delete items not relevant to your PR: 6 | - [ ] Unit and integration tests covering the common scenarios were added 7 | - [ ] A human-readable description of the changes was provided to include in CHANGELOG 8 | - [ ] For significant changes, documentation in https://github.com/ClickHouse/clickhouse-docs was updated with further explanations or tutorials 9 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "lint" 3 | 4 | on: # yamllint disable-line rule:truthy 5 | pull_request: 6 | push: 7 | branches-ignore: 8 | - '*_test' 9 | - '*_dev' 10 | - '*_cloud' 11 | 12 | jobs: 13 | lint: 14 | name: Lint 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - name: Setup Python 3.11 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: 3.11 25 | 26 | - name: Upgrade Setuptools 27 | run: pip install --upgrade setuptools wheel 28 | 29 | - name: Install requirements 30 | run: pip install -r dev_requirements.txt 31 | 32 | - name: Run lint 33 | run: make lint 34 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "PyPI Release" 3 | 4 | # yamllint disable-line rule:truthy 5 | on: 6 | push: 7 | tags: 8 | - 'v*' 9 | workflow_dispatch: 10 | 11 | 12 | jobs: 13 | publish: 14 | name: PyPI Release 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - name: Setup Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: "3.11" 25 | 26 | - name: Upgrade Setuptools 27 | run: pip install --upgrade setuptools wheel 28 | 29 | - name: Build Distribution 30 | run: python setup.py sdist bdist_wheel --universal 31 | 32 | - name: Publish to PyPI 33 | uses: pypa/gh-action-pypi-publish@release/v1 34 | with: 35 | user: __token__ 36 | password: ${{ secrets.pypi_password }} 37 | -------------------------------------------------------------------------------- /.github/workflows/test_cloud.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "test_cloud" 3 | 4 | on: # yamllint disable-line rule:truthy 5 | push: 6 | branches: 7 | - '*_cloud' 8 | workflow_dispatch: 9 | 10 | jobs: 11 | cloud_smt_tests: 12 | name: ClickHouse Cloud SharedMergeTree Tests 13 | runs-on: ubuntu-latest 14 | 15 | env: 16 | PYTHONPATH: dbt 17 | DBT_CH_TEST_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }} 18 | DBT_CH_TEST_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }} 19 | DBT_CH_TEST_CLUSTER_MODE: true 20 | DBT_CH_TEST_CLOUD: true 21 | 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v3 25 | 26 | - name: Setup Python 3.11 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: '3.11' 30 | 31 | - name: Install requirements 32 | run: pip3 install -r dev_requirements.txt 33 | 34 | - name: Run HTTP tests 35 | env: 36 | DBT_CH_TEST_PORT: 8443 37 | run: pytest tests 38 | 39 | - name: Run Native tests 40 | env: 41 | DBT_CH_TEST_PORT: 9440 42 | run: pytest tests 43 | -------------------------------------------------------------------------------- /.github/workflows/test_matrix.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "test_matrix" 3 | 4 | on: # yamllint disable-line rule:truthy 5 | pull_request: 6 | branches: main 7 | push: 8 | branches-ignore: 9 | - '*_test' 10 | - '*_dev' 11 | - '*_cloud' 12 | paths-ignore: 13 | - '**.md' 14 | - 'LICENSE' 15 | workflow_dispatch: 16 | 17 | jobs: 18 | tests: 19 | name: Python ${{ matrix.python-version }} | Clickhouse ${{ matrix.clickhouse-version}} | Ubuntu 20 | runs-on: ubuntu-latest 21 | env: 22 | TEST_SETTINGS_FILE: latest 23 | 24 | strategy: 25 | matrix: 26 | python-version: 27 | - '3.9' 28 | - '3.10' 29 | - '3.11' 30 | - '3.12' 31 | clickhouse-version: 32 | - '23.8' 33 | - '24.1' 34 | - '24.2' 35 | - '24.3' 36 | - latest 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Set environment variables 43 | if: ${{ matrix.clickhouse-version == '22.3' }} 44 | run: | 45 | echo "TEST_SETTINGS_FILE=22_3" >> $GITHUB_ENV 46 | echo "DBT_CH_TEST_CH_VERSION=22.3" >> $GITHUB_ENV 47 | 48 | # a fix until docker compose v2.36.0 will be the default version in the github runner 49 | - name: Install Docker Compose v2.36.0 50 | run: | 51 | sudo mkdir -p /usr/local/lib/docker/cli-plugins 52 | sudo curl -L "https://github.com/docker/compose/releases/download/v2.36.0/docker-compose-linux-x86_64" -o /usr/local/lib/docker/cli-plugins/docker-compose 53 | sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-compose 54 | docker compose version 55 | 56 | - name: Run ClickHouse Cluster Containers 57 | env: 58 | PROJECT_ROOT: ${{ github.workspace }}/tests/integration 59 | run: REPLICA_NUM=1 docker compose -f ${{ github.workspace }}/tests/integration/docker-compose.yml up -d 60 | 61 | - name: Setup Python ${{ matrix.python-version }} 62 | uses: actions/setup-python@v5 63 | with: 64 | python-version: ${{ matrix.python-version }} 65 | 66 | - name: Install requirements 67 | run: pip3 install -r dev_requirements.txt 68 | 69 | - name: Run HTTP tests 70 | env: 71 | DBT_CH_TEST_CLUSTER: test_shard 72 | run: | 73 | PYTHONPATH="${PYTHONPATH}:dbt" 74 | pytest tests 75 | 76 | - name: Run Native tests 77 | env: 78 | DBT_CH_TEST_PORT: 9000 79 | DBT_CH_TEST_CLUSTER: test_shard 80 | run: | 81 | PYTHONPATH="${PYTHONPATH}:dbt" 82 | pytest tests 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env*/ 12 | venv*/ 13 | dbt_env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | *.mypy_cache/ 29 | logs/ 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | .env 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | test.env 53 | 54 | # Mypy 55 | .mypy_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | #Ipython Notebook 71 | .ipynb_checkpoints 72 | 73 | #Emacs 74 | *~ 75 | 76 | # Sublime Text 77 | *.sublime-* 78 | 79 | # Vim 80 | *.sw* 81 | 82 | # pycharm 83 | .idea/ 84 | 85 | # AWS credentials 86 | .aws/ 87 | 88 | .DS_Store 89 | 90 | # vscode 91 | .vscode/ 92 | 93 | # self 94 | dbt-tut 95 | 96 | # local development stuff 97 | dev/ 98 | .python-version 99 | *_project/ 100 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | 4 | rules: 5 | line-length: disable 6 | 7 | ignore: | 8 | *.venv/ 9 | *.mypy_cache/ 10 | *.eggs/ 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guide for dbt-ClickHouse Adapter 2 | 3 | ## Introduction 4 | 5 | Thank you for considering contributing to the dbt-ClickHouse adapter! We value your contributions and appreciate your 6 | efforts to improve the adapter. This guide will help you get started with the contribution process. 7 | 8 | ## Getting Started 9 | 10 | ### 1. Fork the Repository 11 | 12 | Start by forking the repository on GitHub. This will create a copy of the repository under your own GitHub account. 13 | 14 | ### 2. Set Up Environment 15 | 16 | * Make sure Python is installed locally, please refer to [dbt's python compatibility](https://docs.getdbt.com/faqs/Core/install-python-compatibility) (We recommend using version 3.12+). 17 | * Create a dedicated virtual environment (optional but recommended) 18 | * Install all the development requirements: 19 | ```bash 20 | pip install -r dev_requirements.txt 21 | ``` 22 | * Install the local project as a package - go to the project root folder and run 23 | ```bash 24 | pip install . 25 | ``` 26 | * Verify the package was installed successfully: 27 | ```bash 28 | pip list || grep dbt-clickhouse 29 | ``` 30 | the package will be directed to your local project. 31 | 32 | ### 3. Create a Branch 33 | 34 | Create a new branch for your feature or bug fix, please make sure to follow 35 | 36 | ```bash 37 | git checkout -b my-new-feature 38 | ``` 39 | 40 | ### 4. Make Your Changes 41 | 42 | Make the necessary changes in your branch. Ensure that your code follows the existing style and conventions used in the 43 | project. 44 | We strongly recommend to stick to [this](https://www.conventionalcommits.org/en/v1.0.0/) official commit message conventions. 45 | 46 | ### 5. Add or Adjust Tests 47 | 48 | The project tests are located under the `test` folder. Please look for the relevant test associated with 49 | your changes, and adjust it. If not such test, please create one. 50 | 51 | ### 6. Run The Tests 52 | 53 | See [Running Tests](#running-tests) for more information. 54 | 55 | > **Important:** Please make sure the tests are running successfully before pushing your code. 56 | 57 | ### 7. Create a PR 58 | Create a pull request from your forked repository to the main one, include the following: 59 | * In case this is your first contribution, make sure to sign ClickHouse's CLA. 60 | * Link the related issue to your PR. 61 | * Add a sensible description of the feature/issue and detail the use case. 62 | * Make sure to update [CHANGELOG.md](CHANGELOG.md). 63 | 64 | 65 | # Running Tests 66 | 67 | This adapter passes all of dbt basic tests as presented in dbt's [official docs](https://docs.getdbt.com/docs/contributing/testing-a-new-adapter#testing-your-adapter). 68 | Use `pytest tests` to run tests. 69 | 70 | You can customize the test environment via environment variables. We recommend doing so with the pytest `pytest-dotenv` plugin combined with root level `test.env` 71 | configuration file (this file should not be checked into git). The following environment variables are recognized: 72 | 73 | 1. DBT_CH_TEST_HOST - Default=`localhost` 74 | 2. DBT_CH_TEST_USER - your ClickHouse username. Default=`default` 75 | 3. DBT_CH_TEST_PASSWORD - your ClickHouse password. Default='' 76 | 4. DBT_CH_TEST_PORT - ClickHouse client port. Default=8123 (The default is automatically changed to the correct port if DBT_CH_TEST_USE_DOCKER is enabled) 77 | 5. DBT_CH_TEST_DB_ENGINE - Database engine used to create schemas. Defaults to '' (server default) 78 | 6. DBT_CH_TEST_USE_DOCKER - Set to True to run clickhouse-server docker image (see tests/docker-compose.yml). Requires docker-compose. Default=False 79 | 7. DBT_CH_TEST_CH_VERSION - ClickHouse docker image to use. Defaults to `latest` 80 | 8. DBT_CH_TEST_INCLUDE_S3 - Include S3 tests. Default=False since these are currently dependent on a specific ClickHouse S3 bucket/test dataset 81 | 9. DBT_CH_TEST_CLUSTER_MODE - Use the profile value 82 | 10. DBT_CH_TEST_CLUSTER - ClickHouse cluster name, if DBT_CH_TEST_USE_DOCKER set to true, only `test_replica` and `test_shard` is valid (see tests/test_config.xml for cluster settings) 83 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Check style and linting 2 | .PHONY: check-black check-isort check-mypy lint 3 | 4 | check-black: 5 | @echo "--> Running black checks" 6 | @black --check --diff --exclude=venv . 7 | 8 | check-isort: 9 | @echo "--> Running isort checks" 10 | @isort --check-only . 11 | 12 | check-mypy: 13 | @echo "--> Running mypy checks" 14 | @mypy --exclude dbt/adapters/clickhouse/__init__.py --exclude conftest.py . 15 | 16 | check-yamllint: 17 | @echo "--> Running yamllint checks" 18 | @yamllint dbt tests .github 19 | 20 | lint: check-black check-isort check-mypy check-yamllint 21 | 22 | # Format code 23 | .PHONY: fmt 24 | 25 | fmt: 26 | @echo "--> Running isort" 27 | @isort . 28 | @echo "--> Running black" 29 | @black . 30 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.base import AdapterPlugin 2 | 3 | from dbt.adapters.clickhouse.column import ClickHouseColumn # noqa 4 | from dbt.adapters.clickhouse.connections import ClickHouseConnectionManager # noqa 5 | from dbt.adapters.clickhouse.credentials import ClickHouseCredentials 6 | from dbt.adapters.clickhouse.impl import ClickHouseAdapter 7 | from dbt.adapters.clickhouse.relation import ClickHouseRelation # noqa 8 | from dbt.include import clickhouse # noqa 9 | 10 | Plugin = AdapterPlugin( 11 | adapter=ClickHouseAdapter, 12 | credentials=ClickHouseCredentials, 13 | include_path=clickhouse.PACKAGE_PATH, 14 | ) 15 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/__version__.py: -------------------------------------------------------------------------------- 1 | version = '1.9.2' 2 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/column.py: -------------------------------------------------------------------------------- 1 | import re 2 | from dataclasses import dataclass, field 3 | from typing import Any, List, Literal, TypeVar 4 | 5 | from dbt.adapters.base.column import Column 6 | from dbt_common.exceptions import DbtRuntimeError 7 | 8 | Self = TypeVar('Self', bound='ClickHouseColumn') 9 | 10 | 11 | @dataclass 12 | class ClickHouseColumn(Column): 13 | TYPE_LABELS = { 14 | 'STRING': 'String', 15 | 'TIMESTAMP': 'DateTime', 16 | 'FLOAT': 'Float32', 17 | 'INTEGER': 'Int32', 18 | } 19 | is_nullable: bool = False 20 | is_low_cardinality: bool = False 21 | _low_card_regex = re.compile(r'^LowCardinality\((.*)\)$') 22 | _nullable_regex = re.compile(r'^Nullable\((.*)\)$') 23 | _fix_size_regex = re.compile(r'FixedString\((.*?)\)') 24 | _decimal_regex = re.compile(r'Decimal\((\d+), (\d+)\)') 25 | 26 | def __init__(self, column: str, dtype: str) -> None: 27 | char_size = None 28 | numeric_precision = None 29 | numeric_scale = None 30 | 31 | dtype = self._inner_dtype(dtype) 32 | 33 | if dtype.lower().startswith('fixedstring'): 34 | match_sized = self._fix_size_regex.search(dtype) 35 | if match_sized: 36 | char_size = int(match_sized.group(1)) 37 | 38 | if dtype.lower().startswith('decimal'): 39 | match_dec = self._decimal_regex.search(dtype) 40 | numeric_precision = 0 41 | numeric_scale = 0 42 | if match_dec: 43 | numeric_precision = int(match_dec.group(1)) 44 | numeric_scale = int(match_dec.group(2)) 45 | 46 | super().__init__(column, dtype, char_size, numeric_precision, numeric_scale) 47 | 48 | def __repr__(self) -> str: 49 | return f'' 50 | 51 | @property 52 | def data_type(self) -> str: 53 | if self.is_string(): 54 | data_t = self.string_type(self.string_size()) 55 | elif self.is_numeric(): 56 | data_t = self.numeric_type(self.dtype, self.numeric_precision, self.numeric_scale) 57 | else: 58 | data_t = self.dtype 59 | 60 | if self.is_nullable or self.is_low_cardinality: 61 | data_t = self.nested_type(data_t, self.is_low_cardinality, self.is_nullable) 62 | 63 | return data_t 64 | 65 | def is_string(self) -> bool: 66 | return self.dtype.lower() in [ 67 | 'string', 68 | 'fixedstring', 69 | 'longblob', 70 | 'longtext', 71 | 'tinytext', 72 | 'text', 73 | 'varchar', 74 | 'mediumblob', 75 | 'blob', 76 | 'tinyblob', 77 | 'char', 78 | 'mediumtext', 79 | ] or self.dtype.lower().startswith('fixedstring') 80 | 81 | def is_integer(self) -> bool: 82 | return self.dtype.lower().startswith('int') or self.dtype.lower().startswith('uint') 83 | 84 | def is_numeric(self) -> bool: 85 | return self.dtype.lower().startswith('decimal') 86 | 87 | def is_float(self) -> bool: 88 | return self.dtype.lower().startswith('float') 89 | 90 | def string_size(self) -> int: 91 | if not self.is_string(): 92 | raise DbtRuntimeError('Called string_size() on non-string field!') 93 | 94 | if not self.dtype.lower().startswith('fixedstring') or self.char_size is None: 95 | return 256 96 | else: 97 | return int(self.char_size) 98 | 99 | @classmethod 100 | def string_type(cls, size: int) -> str: 101 | return 'String' 102 | 103 | @classmethod 104 | def numeric_type(cls, dtype: str, precision: Any, scale: Any) -> str: 105 | return f'Decimal({precision}, {scale})' 106 | 107 | @classmethod 108 | def nested_type(cls, dtype: str, is_low_cardinality: bool, is_nullable: bool) -> str: 109 | template = "{}" 110 | if is_low_cardinality: 111 | template = template.format("LowCardinality({})") 112 | if is_nullable: 113 | template = template.format("Nullable({})") 114 | return template.format(dtype) 115 | 116 | def literal(self, value): 117 | return f'to{self.dtype}({value})' 118 | 119 | def can_expand_to(self, other_column: 'Column') -> bool: 120 | if not self.is_string() or not other_column.is_string(): 121 | return False 122 | 123 | return other_column.string_size() > self.string_size() 124 | 125 | def _inner_dtype(self, dtype) -> str: 126 | inner_dtype = dtype.strip() 127 | 128 | if low_card_match := self._low_card_regex.search(inner_dtype): 129 | self.is_low_cardinality = True 130 | inner_dtype = low_card_match.group(1) 131 | 132 | if null_match := self._nullable_regex.search(inner_dtype): 133 | self.is_nullable = True 134 | inner_dtype = null_match.group(1) 135 | 136 | return inner_dtype 137 | 138 | 139 | @dataclass(frozen=True) 140 | class ClickHouseColumnChanges: 141 | on_schema_change: Literal['ignore', 'fail', 'append_new_columns', 'sync_all_columns'] 142 | columns_to_add: List[Column] = field(default_factory=list) 143 | columns_to_drop: List[Column] = field(default_factory=list) 144 | columns_to_modify: List[Column] = field(default_factory=list) 145 | 146 | def __bool__(self) -> bool: 147 | return bool(self.columns_to_add or self.columns_to_drop or self.columns_to_modify) 148 | 149 | @property 150 | def has_schema_changes(self) -> bool: 151 | return bool(self) 152 | 153 | @property 154 | def has_sync_changes(self) -> bool: 155 | return bool(self.columns_to_drop or self.columns_to_modify) 156 | 157 | @property 158 | def has_conflicting_changes(self) -> bool: 159 | if self.on_schema_change == 'fail' and self.has_schema_changes: 160 | return True 161 | 162 | if self.on_schema_change != 'sync_all_columns' and self.has_sync_changes: 163 | return True 164 | 165 | return False 166 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/connections.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | from contextlib import contextmanager 4 | from typing import TYPE_CHECKING, Any, Optional, Tuple, Union 5 | 6 | import dbt.exceptions 7 | from dbt.adapters.contracts.connection import AdapterResponse, Connection 8 | from dbt.adapters.sql import SQLConnectionManager 9 | 10 | from dbt.adapters.clickhouse.dbclient import ChRetryableException, get_db_client 11 | from dbt.adapters.clickhouse.logger import logger 12 | 13 | if TYPE_CHECKING: 14 | import agate 15 | 16 | retryable_exceptions = [ChRetryableException] 17 | ddl_re = re.compile(r'^\s*(CREATE|DROP|ALTER)\s', re.IGNORECASE) 18 | 19 | 20 | class ClickHouseConnectionManager(SQLConnectionManager): 21 | """ 22 | ClickHouse Connector connection manager. 23 | """ 24 | 25 | TYPE = 'clickhouse' 26 | 27 | @contextmanager 28 | def exception_handler(self, sql): 29 | try: 30 | yield 31 | except Exception as exp: 32 | logger.debug('Error running SQL: {}', sql) 33 | if isinstance(exp, dbt.exceptions.DbtRuntimeError): 34 | raise 35 | raise dbt.exceptions.DbtRuntimeError('ClickHouse exception: ' + str(exp)) from exp 36 | 37 | @classmethod 38 | def open(cls, connection): 39 | if connection.state == 'open': 40 | logger.debug('Connection is already open, skipping open.') 41 | return connection 42 | credentials = cls.get_credentials(connection.credentials) 43 | 44 | def connect(): 45 | return get_db_client(credentials) 46 | 47 | return cls.retry_connection( 48 | connection, 49 | connect=connect, 50 | logger=logger, 51 | retry_limit=credentials.retries, 52 | retryable_exceptions=retryable_exceptions, 53 | ) 54 | 55 | def cancel(self, connection): 56 | connection_name = connection.name 57 | logger.debug('Cancelling query \'{}\'', connection_name) 58 | connection.handle.close() 59 | logger.debug('Cancel query \'{}\'', connection_name) 60 | 61 | def release(self): 62 | pass # There is no "release" type functionality in the existing ClickHouse connectors 63 | 64 | @classmethod 65 | def get_table_from_response(cls, response, column_names) -> "agate.Table": 66 | """ 67 | Build agate table from response. 68 | :param response: ClickHouse query result 69 | :param column_names: Table column names 70 | """ 71 | from dbt_common.clients.agate_helper import table_from_data_flat 72 | 73 | data = [] 74 | for row in response: 75 | data.append(dict(zip(column_names, row))) 76 | 77 | return table_from_data_flat(data, column_names) 78 | 79 | def execute( 80 | self, sql: str, auto_begin: bool = False, fetch: bool = False, limit: Optional[int] = None 81 | ) -> Tuple[AdapterResponse, "agate.Table"]: 82 | # Don't try to fetch result of clustered DDL responses, we don't know what to do with them 83 | if fetch and ddl_re.match(sql): 84 | fetch = False 85 | 86 | sql = self._add_query_comment(sql) 87 | conn = self.get_thread_connection() 88 | client = conn.handle 89 | 90 | with self.exception_handler(sql): 91 | logger.debug(f'On {conn.name}: {sql}...') 92 | pre = time.time() 93 | if fetch: 94 | query_result = client.query(sql) 95 | else: 96 | query_result = client.command(sql) 97 | status = self.get_status(client) 98 | logger.debug(f'SQL status: {status} in {(time.time() - pre):.2f} seconds') 99 | if fetch: 100 | table = self.get_table_from_response( 101 | query_result.result_set, query_result.column_names 102 | ) 103 | else: 104 | from dbt_common.clients.agate_helper import empty_table 105 | 106 | table = empty_table() 107 | return AdapterResponse(_message=status), table 108 | 109 | def add_query( 110 | self, 111 | sql: str, 112 | auto_begin: bool = True, 113 | bindings: Optional[Any] = None, 114 | abridge_sql_log: bool = False, 115 | ) -> Tuple[Connection, Any]: 116 | sql = self._add_query_comment(sql) 117 | conn = self.get_thread_connection() 118 | client = conn.handle 119 | with self.exception_handler(sql): 120 | logger.debug(f'On {conn.name}: {sql}...') 121 | pre = time.time() 122 | client.command(sql) 123 | status = self.get_status(client) 124 | logger.debug(f'SQL status: {status} in {(time.time() - pre):0.2f} seconds') 125 | return conn, None 126 | 127 | @classmethod 128 | def get_credentials(cls, credentials): 129 | """ 130 | Returns ClickHouse credentials 131 | """ 132 | return credentials 133 | 134 | @classmethod 135 | def get_status(cls, _): 136 | """ 137 | Returns connection status 138 | """ 139 | return 'OK' 140 | 141 | @classmethod 142 | def get_response(cls, _): 143 | return 'OK' 144 | 145 | def begin(self): 146 | pass 147 | 148 | def commit(self): 149 | pass 150 | 151 | @classmethod 152 | def data_type_code_to_name(cls, type_code: Union[int, str]) -> str: 153 | assert isinstance(type_code, int) 154 | return '' 155 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/credentials.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, Dict, Optional, Union 3 | 4 | from dbt.adapters.contracts.connection import Credentials 5 | from dbt_common.exceptions import DbtRuntimeError 6 | 7 | 8 | @dataclass 9 | class ClickHouseCredentials(Credentials): 10 | """ 11 | ClickHouse connection credentials data class. 12 | """ 13 | 14 | driver: Optional[str] = None 15 | host: str = 'localhost' 16 | port: Optional[int] = None 17 | user: Optional[str] = 'default' 18 | retries: int = 1 19 | database: Optional[str] = '' 20 | schema: Optional[str] = 'default' 21 | password: str = '' 22 | cluster: Optional[str] = None 23 | database_engine: Optional[str] = None 24 | cluster_mode: bool = False 25 | secure: bool = False 26 | verify: bool = True 27 | client_cert: Optional[str] = None 28 | client_cert_key: Optional[str] = None 29 | connect_timeout: int = 10 30 | send_receive_timeout: int = 300 31 | sync_request_timeout: int = 5 32 | compress_block_size: int = 1048576 33 | compression: str = '' 34 | check_exchange: bool = True 35 | custom_settings: Optional[Dict[str, Any]] = None 36 | use_lw_deletes: bool = False 37 | local_suffix: str = 'local' 38 | local_db_prefix: str = '' 39 | allow_automatic_deduplication: bool = False 40 | tcp_keepalive: Union[bool, tuple[int, int, int], list[int]] = False 41 | 42 | @property 43 | def type(self): 44 | return 'clickhouse' 45 | 46 | @property 47 | def unique_field(self): 48 | return self.host 49 | 50 | def __post_init__(self): 51 | if self.database and self.database != self.schema: 52 | raise DbtRuntimeError( 53 | f' schema: {self.schema} \n' 54 | f' database: {self.database} \n' 55 | f' cluster: {self.cluster} \n' 56 | f'On Clickhouse, database must be omitted or have the same value as' 57 | f' schema.' 58 | ) 59 | self.database = '' 60 | 61 | # clickhouse_driver expects tcp_keepalive to be a tuple if it's not a boolean 62 | if isinstance(self.tcp_keepalive, list): 63 | self.tcp_keepalive = tuple(self.tcp_keepalive) 64 | 65 | def _connection_keys(self): 66 | return ( 67 | 'driver', 68 | 'host', 69 | 'port', 70 | 'user', 71 | 'schema', 72 | 'retries', 73 | 'cluster', 74 | 'database_engine', 75 | 'cluster_mode', 76 | 'secure', 77 | 'verify', 78 | 'client_cert', 79 | 'client_cert_key', 80 | 'connect_timeout', 81 | 'send_receive_timeout', 82 | 'sync_request_timeout', 83 | 'compress_block_size', 84 | 'compression', 85 | 'check_exchange', 86 | 'custom_settings', 87 | 'use_lw_deletes', 88 | 'allow_automatic_deduplication', 89 | 'tcp_keepalive', 90 | ) 91 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/errors.py: -------------------------------------------------------------------------------- 1 | schema_change_fail_error = """ 2 | The source and target schemas on this incremental model are out of sync. 3 | They can be reconciled in several ways: 4 | - set the `on_schema_change` config to `append_new_columns` or `sync_all_columns`. 5 | - Re-run the incremental model with `full_refresh: True` to update the target schema. 6 | - update the schema manually and re-run the process. 7 | 8 | Additional troubleshooting context: 9 | Source columns not in target: {0} 10 | Target columns not in source: {1} 11 | New column types: {2} 12 | """ 13 | 14 | schema_change_datatype_error = """ 15 | The source and target schemas on this incremental model contain different data types. This is not supported. 16 | 17 | Changed column types: {0} 18 | """ 19 | 20 | schema_change_missing_source_error = """ 21 | The target schema in on this incremental model contains a column not in the source schema. This is not supported. 22 | 23 | Source columns not in target: {0} 24 | """ 25 | 26 | lw_deletes_not_enabled_error = """ 27 | Attempting to apply the configuration `use_lw_deletes` to enable the delete+insert incremental strategy, but 28 | `light weight deletes` are either not available or not enabled on this ClickHouse server. 29 | """ 30 | 31 | lw_deletes_not_enabled_warning = """ 32 | `light weight deletes` are either not available or not enabled on this ClickHouse server. This prevents the use 33 | of the delete+insert incremental strategy, which may negatively affect performance for incremental models. 34 | """ 35 | 36 | nd_mutations_not_enabled_error = """ 37 | Attempting to apply the configuration `use_lw_deletes` to enable the delete+insert incremental strategy, but 38 | the required `allow_nondeterministic_mutations` is not enabled and is `read_only` for this user 39 | """ 40 | 41 | nd_mutations_not_enabled_warning = """ 42 | The setting `allow_nondeterministic_mutations` is not enabled and is `read_only` for this user` This prevents the use 43 | of `light weight deletes` and therefore the delete+insert incremental strategy. This may negatively affect performance 44 | for incremental models 45 | """ 46 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/httpclient.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import clickhouse_connect 4 | from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError 5 | from dbt.adapters.__about__ import version as dbt_adapters_version 6 | from dbt_common.exceptions import DbtDatabaseError 7 | 8 | from dbt.adapters.clickhouse import ClickHouseColumn 9 | from dbt.adapters.clickhouse.__version__ import version as dbt_clickhouse_version 10 | from dbt.adapters.clickhouse.dbclient import ChClientWrapper, ChRetryableException 11 | from dbt.adapters.clickhouse.util import hide_stack_trace 12 | 13 | 14 | class ChHttpClient(ChClientWrapper): 15 | def query(self, sql, **kwargs): 16 | try: 17 | return self._client.query(sql, **kwargs) 18 | except DatabaseError as ex: 19 | err_msg = hide_stack_trace(ex) 20 | raise DbtDatabaseError(err_msg) from ex 21 | 22 | def command(self, sql, **kwargs): 23 | try: 24 | return self._client.command(sql, **kwargs) 25 | except DatabaseError as ex: 26 | err_msg = hide_stack_trace(ex) 27 | raise DbtDatabaseError(err_msg) from ex 28 | 29 | def columns_in_query(self, sql: str, **kwargs) -> List[ClickHouseColumn]: 30 | try: 31 | query_result = self._client.query( 32 | f"SELECT * FROM ( \n" f"{sql} \n" f") LIMIT 0", 33 | **kwargs, 34 | ) 35 | return [ 36 | ClickHouseColumn.create(name, ch_type.name) 37 | for name, ch_type in zip(query_result.column_names, query_result.column_types) 38 | ] 39 | except DatabaseError as ex: 40 | err_msg = hide_stack_trace(ex) 41 | raise DbtDatabaseError(err_msg) from ex 42 | 43 | def get_ch_setting(self, setting_name): 44 | setting = self._client.server_settings.get(setting_name) 45 | return (setting.value, setting.readonly) if setting else (None, 0) 46 | 47 | def database_dropped(self, database: str): 48 | # This is necessary for the http client to avoid exceptions when ClickHouse doesn't recognize the database 49 | # query parameter 50 | if self.database == database: 51 | self._client.database = None 52 | 53 | def close(self): 54 | self._client.close() 55 | 56 | def _create_client(self, credentials): 57 | try: 58 | return clickhouse_connect.get_client( 59 | host=credentials.host, 60 | port=credentials.port, 61 | username=credentials.user, 62 | password=credentials.password, 63 | interface='https' if credentials.secure else 'http', 64 | compress=False if credentials.compression == '' else bool(credentials.compression), 65 | connect_timeout=credentials.connect_timeout, 66 | send_receive_timeout=credentials.send_receive_timeout, 67 | client_name=f'dbt-adapters/{dbt_adapters_version} dbt-clickhouse/{dbt_clickhouse_version}', 68 | verify=credentials.verify, 69 | client_cert=credentials.client_cert, 70 | client_cert_key=credentials.client_cert_key, 71 | query_limit=0, 72 | settings=self._conn_settings, 73 | ) 74 | except OperationalError as ex: 75 | raise ChRetryableException(str(ex)) from ex 76 | 77 | def _set_client_database(self): 78 | self._client.database = self.database 79 | 80 | def _server_version(self): 81 | return self._client.server_version 82 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/logger.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.events.logging import AdapterLogger 2 | 3 | logger = AdapterLogger('dbt_clickhouse') 4 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/nativeclient.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import clickhouse_driver 4 | import pkg_resources 5 | from clickhouse_driver.errors import NetworkError, SocketTimeoutError 6 | from dbt.adapters.__about__ import version as dbt_adapters_version 7 | from dbt_common.exceptions import DbtDatabaseError 8 | 9 | from dbt.adapters.clickhouse import ClickHouseColumn, ClickHouseCredentials 10 | from dbt.adapters.clickhouse.__version__ import version as dbt_clickhouse_version 11 | from dbt.adapters.clickhouse.dbclient import ChClientWrapper, ChRetryableException 12 | from dbt.adapters.clickhouse.logger import logger 13 | from dbt.adapters.clickhouse.util import hide_stack_trace 14 | 15 | try: 16 | driver_version = pkg_resources.get_distribution('clickhouse-driver').version 17 | except pkg_resources.ResolutionError: 18 | driver_version = 'unknown' 19 | 20 | 21 | class ChNativeClient(ChClientWrapper): 22 | def query(self, sql, **kwargs): 23 | try: 24 | return NativeClientResult(self._client.execute(sql, with_column_types=True, **kwargs)) 25 | except clickhouse_driver.errors.Error as ex: 26 | err_msg = hide_stack_trace(ex) 27 | raise DbtDatabaseError(err_msg) from ex 28 | 29 | def command(self, sql, **kwargs): 30 | try: 31 | result = self._client.execute(sql, **kwargs) 32 | if len(result) and len(result[0]): 33 | return result[0][0] 34 | except clickhouse_driver.errors.Error as ex: 35 | err_msg = hide_stack_trace(ex) 36 | raise DbtDatabaseError(err_msg) from ex 37 | 38 | def columns_in_query(self, sql: str, **kwargs) -> List[ClickHouseColumn]: 39 | try: 40 | _, columns = self._client.execute( 41 | f"SELECT * FROM ( \n" f"{sql} \n" f") LIMIT 0", 42 | with_column_types=True, 43 | ) 44 | return [ClickHouseColumn.create(column[0], column[1]) for column in columns] 45 | except clickhouse_driver.errors.Error as ex: 46 | err_msg = hide_stack_trace(ex) 47 | raise DbtDatabaseError(err_msg) from ex 48 | 49 | def get_ch_setting(self, setting_name): 50 | try: 51 | result = self._client.execute( 52 | f"SELECT value, readonly FROM system.settings WHERE name = '{setting_name}'" 53 | ) 54 | except clickhouse_driver.errors.Error as ex: 55 | logger.warn('Unexpected error retrieving ClickHouse server setting', ex) 56 | return None 57 | return (result[0][0], result[0][1]) if result else (None, 0) 58 | 59 | def close(self): 60 | self._client.disconnect() 61 | 62 | def _create_client(self, credentials: ClickHouseCredentials): 63 | client = clickhouse_driver.Client( 64 | host=credentials.host, 65 | port=credentials.port, 66 | user=credentials.user, 67 | password=credentials.password, 68 | client_name=f'dbt-adapters/{dbt_adapters_version} dbt-clickhouse/{dbt_clickhouse_version} clickhouse-driver/{driver_version}', 69 | secure=credentials.secure, 70 | verify=credentials.verify, 71 | certfile=credentials.client_cert, 72 | keyfile=credentials.client_cert_key, 73 | connect_timeout=credentials.connect_timeout, 74 | send_receive_timeout=credentials.send_receive_timeout, 75 | sync_request_timeout=credentials.sync_request_timeout, 76 | compress_block_size=credentials.compress_block_size, 77 | compression=False if credentials.compression == '' else credentials.compression, 78 | tcp_keepalive=credentials.tcp_keepalive, 79 | settings=self._conn_settings, 80 | ) 81 | try: 82 | client.connection.connect() 83 | except (SocketTimeoutError, NetworkError) as ex: 84 | raise ChRetryableException(str(ex)) from ex 85 | return client 86 | 87 | def _set_client_database(self): 88 | # After we know the database exists, reconnect to that database if appropriate 89 | if self._client.connection.database != self.database: 90 | self._client.connection.disconnect() 91 | self._client.connection.database = self.database 92 | self._client.connection.connect() 93 | 94 | def _server_version(self): 95 | server_info = self._client.connection.server_info 96 | return ( 97 | f'{server_info.version_major}.{server_info.version_minor}.{server_info.version_patch}' 98 | ) 99 | 100 | 101 | class NativeClientResult: 102 | def __init__(self, native_result): 103 | self.result_set = native_result[0] 104 | self.column_names = [col[0] for col in native_result[1]] 105 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/query.py: -------------------------------------------------------------------------------- 1 | BS = '\\' 2 | must_escape = (BS, '\'', '`') 3 | 4 | 5 | def quote_identifier(identifier: str): 6 | first_char = identifier[0] 7 | if first_char in ('`', '"') and identifier[-1] == first_char: 8 | # Identifier is already quoted, assume that it's valid 9 | return identifier 10 | return f'`{escape_str(identifier)}`' 11 | 12 | 13 | def escape_str(value: str): 14 | return ''.join(f'{BS}{c}' if c in must_escape else c for c in value) 15 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/relation.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any, Optional, Type 3 | 4 | from dbt.adapters.base.relation import BaseRelation, EventTimeFilter, Path, Policy, Self 5 | from dbt.adapters.contracts.relation import HasQuoting, RelationConfig 6 | from dbt_common.dataclass_schema import StrEnum 7 | from dbt_common.exceptions import DbtRuntimeError 8 | from dbt_common.utils import deep_merge 9 | 10 | from dbt.adapters.clickhouse.query import quote_identifier 11 | 12 | NODE_TYPE_SOURCE = 'source' 13 | 14 | 15 | @dataclass 16 | class ClickHouseQuotePolicy(Policy): 17 | database: bool = True 18 | schema: bool = True 19 | identifier: bool = True 20 | 21 | 22 | @dataclass 23 | class ClickHouseIncludePolicy(Policy): 24 | database: bool = False 25 | schema: bool = True 26 | identifier: bool = True 27 | 28 | 29 | class ClickHouseRelationType(StrEnum): 30 | Table = "table" 31 | View = "view" 32 | CTE = "cte" 33 | MaterializedView = "materialized_view" 34 | External = "external" 35 | Ephemeral = "ephemeral" 36 | Dictionary = "dictionary" 37 | 38 | 39 | @dataclass(frozen=True, eq=False, repr=False) 40 | class ClickHouseRelation(BaseRelation): 41 | type: Optional[ClickHouseRelationType] = None 42 | quote_policy: Policy = field(default_factory=lambda: ClickHouseQuotePolicy()) 43 | include_policy: Policy = field(default_factory=lambda: ClickHouseIncludePolicy()) 44 | quote_character: str = '`' 45 | can_exchange: bool = False 46 | can_on_cluster: bool = False 47 | 48 | def __post_init__(self): 49 | if self.database != self.schema and self.database: 50 | raise DbtRuntimeError(f'Cannot set database {self.database} in clickhouse!') 51 | self.path.database = '' 52 | 53 | def render(self) -> str: 54 | return ".".join(quote_identifier(part) for _, part in self._render_iterator() if part) 55 | 56 | def _render_event_time_filtered(self, event_time_filter: EventTimeFilter) -> str: 57 | """ 58 | Returns "" if start and end are both None 59 | """ 60 | filter = "" 61 | if event_time_filter.start and event_time_filter.end: 62 | filter = f"{event_time_filter.field_name} >= '{event_time_filter.start.strftime('%Y-%m-%d %H:%M:%S')}' and {event_time_filter.field_name} < '{event_time_filter.end.strftime('%Y-%m-%d %H:%M:%S')}'" 63 | elif event_time_filter.start: 64 | filter = f"{event_time_filter.field_name} >= '{event_time_filter.start.strftime('%Y-%m-%d %H:%M:%S')}'" 65 | elif event_time_filter.end: 66 | filter = f"{event_time_filter.field_name} < '{event_time_filter.end.strftime('%Y-%m-%d %H:%M:%S')}'" 67 | 68 | return filter 69 | 70 | def derivative(self, suffix: str, relation_type: Optional[str] = None) -> BaseRelation: 71 | path = Path(schema=self.path.schema, database='', identifier=self.path.identifier + suffix) 72 | derivative_type = ClickHouseRelationType(relation_type) if relation_type else self.type 73 | return ClickHouseRelation( 74 | type=derivative_type, path=path, can_on_cluster=self.can_on_cluster 75 | ) 76 | 77 | def matches( 78 | self, 79 | database: Optional[str] = '', 80 | schema: Optional[str] = None, 81 | identifier: Optional[str] = None, 82 | ): 83 | if schema: 84 | raise DbtRuntimeError(f'Passed unexpected schema value {schema} to Relation.matches') 85 | return self.database == database and self.identifier == identifier 86 | 87 | @property 88 | def should_on_cluster(self) -> bool: 89 | if self.include_policy.identifier: 90 | return self.can_on_cluster 91 | else: 92 | # create database/schema on cluster by default 93 | return True 94 | 95 | @classmethod 96 | def get_on_cluster( 97 | cls: Type[Self], 98 | cluster: str = '', 99 | database_engine: str = '', 100 | ) -> bool: 101 | # not using ternary expression for simplicity 102 | if not cluster.strip() or 'replicated' in database_engine.lower(): 103 | return False 104 | else: 105 | return True 106 | 107 | @classmethod 108 | def create_from( 109 | cls: Type[Self], 110 | quoting: HasQuoting, 111 | relation_config: RelationConfig, 112 | **kwargs: Any, 113 | ) -> Self: 114 | quote_policy = kwargs.pop("quote_policy", {}) 115 | 116 | config_quoting = relation_config.quoting_dict 117 | config_quoting.pop("column", None) 118 | # precedence: kwargs quoting > relation config quoting > base quoting > default quoting 119 | quote_policy = deep_merge( 120 | cls.get_default_quote_policy().to_dict(omit_none=True), 121 | quoting.quoting, 122 | config_quoting, 123 | quote_policy, 124 | ) 125 | 126 | # If the database is set, and the source schema is "defaulted" to the source.name, override the 127 | # schema with the database instead, since that's presumably what's intended for clickhouse 128 | schema = relation_config.schema 129 | 130 | can_on_cluster = None 131 | cluster = "" 132 | database_engine = "" 133 | # We placed a hardcoded const (instead of importing it from dbt-core) in order to decouple the packages 134 | if relation_config.resource_type == NODE_TYPE_SOURCE: 135 | if schema == relation_config.source_name and relation_config.database: 136 | schema = relation_config.database 137 | else: 138 | # quoting is only available for non-source nodes 139 | cluster = quoting.credentials.cluster or "" 140 | database_engine = quoting.credentials.database_engine or "" 141 | 142 | if ( 143 | cluster 144 | and str(relation_config.config.get("disable_on_cluster")).lower() != "true" 145 | and 'replicated' not in database_engine.lower() 146 | ): 147 | can_on_cluster = True 148 | 149 | return cls.create( 150 | database='', 151 | schema=schema, 152 | identifier=relation_config.identifier, 153 | quote_policy=quote_policy, 154 | can_on_cluster=can_on_cluster, 155 | **kwargs, 156 | ) 157 | -------------------------------------------------------------------------------- /dbt/adapters/clickhouse/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dbt_common.exceptions import DbtRuntimeError 4 | 5 | 6 | def compare_versions(v1: str, v2: str) -> int: 7 | v1_parts = v1.split('.') 8 | v2_parts = v2.split('.') 9 | for part1, part2 in zip(v1_parts, v2_parts): 10 | try: 11 | if int(part1) != int(part2): 12 | return 1 if int(part1) > int(part2) else -1 13 | except ValueError: 14 | raise DbtRuntimeError("Version must consist of only numbers separated by '.'") 15 | return 0 16 | 17 | 18 | def hide_stack_trace(ex: Exception) -> str: 19 | 20 | if not os.getenv("HIDE_STACK_TRACE", ''): 21 | return str(ex).strip() 22 | 23 | err_msg = str(ex).split("Stack trace")[0].strip() 24 | return err_msg 25 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PACKAGE_PATH = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/dbt_project.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: dbt_clickhouse 3 | version: 1.0 4 | config-version: 2 5 | 6 | macro-paths: ["macros"] 7 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/adapters.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__list_schemas(database) %} 2 | {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} 3 | select name from system.databases 4 | {% endcall %} 5 | {{ return(load_result('list_schemas').table) }} 6 | {% endmacro %} 7 | 8 | {% macro clickhouse__create_schema(relation) -%} 9 | {%- call statement('create_schema') -%} 10 | create database if not exists {{ relation.without_identifier().include(database=False) }} 11 | {{ on_cluster_clause(relation)}} 12 | {{ adapter.clickhouse_db_engine_clause() }} 13 | {% endcall %} 14 | {% endmacro %} 15 | 16 | {% macro clickhouse__drop_schema(relation) -%} 17 | {%- call statement('drop_schema') -%} 18 | drop database if exists {{ relation.without_identifier().include(database=False) }} {{ on_cluster_clause(relation)}} 19 | {%- endcall -%} 20 | {% endmacro %} 21 | 22 | {% macro clickhouse__list_relations_without_caching(schema_relation) %} 23 | {% call statement('list_relations_without_caching', fetch_result=True) -%} 24 | select 25 | t.name as name, 26 | t.database as schema, 27 | multiIf( 28 | engine in ('MaterializedView', 'View'), 'view', 29 | engine = 'Dictionary', 'dictionary', 30 | 'table' 31 | ) as type, 32 | db.engine as db_engine, 33 | {%- if adapter.get_clickhouse_cluster_name() -%} 34 | count(distinct _shard_num) > 1 as is_on_cluster 35 | from clusterAllReplicas({{ adapter.get_clickhouse_cluster_name() }}, system.tables) as t 36 | join system.databases as db on t.database = db.name 37 | where schema = '{{ schema_relation.schema }}' 38 | group by name, schema, type, db_engine 39 | {%- else -%} 40 | 0 as is_on_cluster 41 | from system.tables as t join system.databases as db on t.database = db.name 42 | where schema = '{{ schema_relation.schema }}' 43 | {% endif %} 44 | 45 | {% endcall %} 46 | {{ return(load_result('list_relations_without_caching').table) }} 47 | {% endmacro %} 48 | 49 | {% macro clickhouse__get_columns_in_relation(relation) -%} 50 | {% call statement('get_columns', fetch_result=True) %} 51 | select name, type from system.columns where table = '{{ relation.identifier }}' 52 | {% if relation.schema %} 53 | and database = '{{ relation.schema }}' 54 | {% endif %} 55 | order by position 56 | {% endcall %} 57 | {{ return(sql_convert_columns_in_relation(load_result('get_columns').table)) }} 58 | {% endmacro %} 59 | 60 | {% macro clickhouse__drop_relation(relation, obj_type='table') -%} 61 | {% call statement('drop_relation', auto_begin=False) -%} 62 | drop {{ obj_type }} if exists {{ relation }} {{ on_cluster_clause(relation, True)}} 63 | {%- endcall %} 64 | {% endmacro %} 65 | 66 | {% macro clickhouse__rename_relation(from_relation, to_relation, obj_type='table') -%} 67 | {% call statement('drop_relation') %} 68 | drop {{ obj_type }} if exists {{ to_relation }} {{ on_cluster_clause(to_relation)}} 69 | {% endcall %} 70 | {% call statement('rename_relation') %} 71 | rename {{ obj_type }} {{ from_relation }} to {{ to_relation }} {{ on_cluster_clause(from_relation)}} 72 | {% endcall %} 73 | {% endmacro %} 74 | 75 | {% macro clickhouse__truncate_relation(relation) -%} 76 | {% call statement('truncate_relation') -%} 77 | truncate table {{ relation }} {{ on_cluster_clause(relation)}} 78 | {%- endcall %} 79 | {% endmacro %} 80 | 81 | {% macro clickhouse__make_temp_relation(base_relation, suffix) %} 82 | {% set tmp_identifier = base_relation.identifier ~ suffix %} 83 | {% set tmp_relation = base_relation.incorporate( 84 | path={"identifier": tmp_identifier, "schema": None}) -%} 85 | {% do return(tmp_relation) %} 86 | {% endmacro %} 87 | 88 | 89 | {% macro clickhouse__generate_database_name(custom_database_name=none, node=none) -%} 90 | {% do return('') %} 91 | {%- endmacro %} 92 | 93 | {% macro clickhouse__get_columns_in_query(select_sql) %} 94 | {% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%} 95 | select * from ( 96 | {{ select_sql }} 97 | ) as __dbt_sbq 98 | limit 0 99 | {% endcall %} 100 | 101 | {{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }} 102 | {% endmacro %} 103 | 104 | {% macro clickhouse__alter_column_type(relation, column_name, new_column_type) -%} 105 | {% call statement('alter_column_type') %} 106 | alter table {{ relation }} {{ on_cluster_clause(relation)}} modify column {{ adapter.quote(column_name) }} {{ new_column_type }} 107 | {% endcall %} 108 | {% endmacro %} 109 | 110 | {% macro exchange_tables_atomic(old_relation, target_relation, obj_types='TABLES') %} 111 | 112 | {%- if adapter.get_clickhouse_cluster_name() is not none and obj_types == 'TABLES' and 'Replicated' in engine_clause() %} 113 | {%- call statement('exchange_table_sync_replica') -%} 114 | SYSTEM SYNC REPLICA {{ on_cluster_clause(target_relation) }} {{ target_relation.schema }}.{{ target_relation.identifier }} 115 | {% endcall %} 116 | {%- endif %} 117 | {%- call statement('exchange_tables_atomic') -%} 118 | EXCHANGE {{ obj_types }} {{ old_relation }} AND {{ target_relation }} {{ on_cluster_clause(target_relation)}} 119 | {% endcall %} 120 | {% endmacro %} 121 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/adapters/apply_grants.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__get_show_grant_sql(relation) %} 2 | SELECT access_type as privilege_type, COALESCE(user_name, role_name) as grantee from system.grants where table = '{{ relation.name }}' 3 | AND database = '{{ relation.schema }}' 4 | {%- endmacro %} 5 | 6 | {% macro clickhouse__call_dcl_statements(dcl_statement_list) %} 7 | {% for dcl_statement in dcl_statement_list %} 8 | {% call statement('dcl') %} 9 | {{ dcl_statement }}; 10 | {% endcall %} 11 | {% endfor %} 12 | {% endmacro %} 13 | 14 | 15 | {%- macro clickhouse__get_grant_sql(relation, privilege, grantees) -%} 16 | grant {{ on_cluster_clause(relation)}} {{ privilege }} on {{ relation }} to {{ grantees | join(', ') }} 17 | {%- endmacro -%} 18 | 19 | {%- macro clickhouse__get_revoke_sql(relation, privilege, grantees) -%} 20 | revoke {{ on_cluster_clause(relation)}} {{ privilege }} on {{ relation }} from {{ grantees | join(', ') }} 21 | {%- endmacro -%} 22 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/adapters/relation.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__get_or_create_relation(database, schema, identifier, type) %} 2 | {%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %} 3 | {% if target_relation %} 4 | {% do return([true, target_relation]) %} 5 | {% endif %} 6 | 7 | {%- set can_exchange = adapter.can_exchange(schema, type) %} 8 | {%- set should_on_cluster = adapter.should_on_cluster(config.get('materialized'), engine_clause()) %} 9 | {%- set new_relation = api.Relation.create( 10 | database=None, 11 | schema=schema, 12 | identifier=identifier, 13 | type=type, 14 | can_exchange=can_exchange, 15 | can_on_cluster=should_on_cluster 16 | ) -%} 17 | {% do return([false, new_relation]) %} 18 | {% endmacro %} 19 | 20 | {% macro clickhouse__get_database(database) %} 21 | {% call statement('get_database', fetch_result=True) %} 22 | select name, engine, comment 23 | from system.databases 24 | where name = '{{ database }}' 25 | {% endcall %} 26 | {% do return(load_result('get_database').table) %} 27 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/catalog.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__get_catalog(information_schema, schemas) -%} 2 | {%- call statement('catalog', fetch_result=True) -%} 3 | select 4 | '' as table_database, 5 | columns.database as table_schema, 6 | columns.table as table_name, 7 | if(tables.engine not in ('MaterializedView', 'View'), 'table', 'view') as table_type, 8 | nullIf(tables.comment, '') as table_comment, 9 | columns.name as column_name, 10 | columns.position as column_index, 11 | columns.type as column_type, 12 | nullIf(columns.comment, '') as column_comment, 13 | null as table_owner 14 | from system.columns as columns 15 | join system.tables as tables on tables.database = columns.database and tables.name = columns.table 16 | where database != 'system' and 17 | ( 18 | {%- for schema in schemas -%} 19 | columns.database = '{{ schema }}' 20 | {%- if not loop.last %} or {% endif -%} 21 | {%- endfor -%} 22 | ) 23 | order by columns.database, columns.table, columns.position 24 | {%- endcall -%} 25 | {{ return(load_result('catalog').table) }} 26 | {%- endmacro %} 27 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/column_spec_ddl.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__get_assert_columns_equivalent(sql) -%} 2 | {%- set user_defined_columns = model['columns'] -%} 3 | 4 | {%- if not user_defined_columns -%} 5 | {{ exceptions.raise_contract_error([], []) }} 6 | {%- endif -%} 7 | 8 | {%- set yaml_columns = user_defined_columns.values() -%} 9 | 10 | {%- set sql_file_provided_columns = adapter.get_column_schema_from_query(sql) -%} 11 | {%- set sql_columns = adapter.format_columns(sql_file_provided_columns) -%} 12 | 13 | {%- if sql_columns|length != yaml_columns|length -%} 14 | {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} 15 | {%- endif -%} 16 | 17 | {%- if sql_columns|length != yaml_columns|length -%} 18 | {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} 19 | {%- endif -%} 20 | 21 | {%- for sql_col in sql_columns -%} 22 | {%- set yaml_col = [] -%} 23 | {%- for this_col in yaml_columns -%} 24 | {%- if this_col['name'] == sql_col['name'] -%} 25 | {%- do yaml_col.append(this_col) -%} 26 | {%- break -%} 27 | {%- endif -%} 28 | {%- endfor -%} 29 | {%- if not yaml_col -%} 30 | {#-- Column with name not found in yaml #} 31 | {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} 32 | {%- endif -%} 33 | {%- if sql_col['data_type'] != yaml_col[0]['data_type'] -%} 34 | {#-- Column data types don't match #} 35 | {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} 36 | {%- endif -%} 37 | {%- endfor -%} 38 | 39 | {% endmacro %} 40 | 41 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/dictionary.sql: -------------------------------------------------------------------------------- 1 | {%- materialization dictionary, adapter='clickhouse' -%} 2 | 3 | {%- set existing_relation = load_cached_relation(this) -%} 4 | {%- set target_relation = this.incorporate(type='dictionary') -%} 5 | {%- set cluster_clause = on_cluster_clause(target_relation) -%} 6 | 7 | {%- set grant_config = config.get('grants') -%} 8 | 9 | {{ run_hooks(pre_hooks, inside_transaction=False) }} 10 | 11 | 12 | {{ run_hooks(pre_hooks, inside_transaction=True) }} 13 | 14 | {# create our new dictionary #} 15 | {% call statement('main') -%} 16 | {{ clickhouse__get_create_dictionary_as_sql(target_relation, cluster_clause, sql) }} 17 | {%- endcall %} 18 | 19 | {% set should_revoke = should_revoke(target_relation, full_refresh_mode=True) %} 20 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 21 | 22 | {% do persist_docs(target_relation, model) %} 23 | 24 | {{ run_hooks(post_hooks, inside_transaction=True) }} 25 | 26 | {{ adapter.commit() }} 27 | 28 | 29 | {{ run_hooks(post_hooks, inside_transaction=False) }} 30 | 31 | {{ return({'relations': [target_relation]}) }} 32 | 33 | {%- endmaterialization -%} 34 | 35 | 36 | {% macro clickhouse__get_create_dictionary_as_sql(relation, cluster_clause, sql) %} 37 | {%- set fields = config.get('fields') -%} 38 | {%- set source_type = config.get('source_type') -%} 39 | 40 | CREATE OR REPLACE DICTIONARY {{ relation }} {{ cluster_clause }} 41 | ( 42 | {%- for (name, data_type) in fields -%} 43 | {{ name }} {{ data_type }}{%- if not loop.last -%},{%- endif -%} 44 | {%- endfor -%} 45 | ) 46 | {{ primary_key_clause(label="primary key") }} 47 | SOURCE( 48 | {%- if source_type == 'http' %} 49 | {{ http_source() }} 50 | {% else %} 51 | {{ clickhouse_source(sql) }} 52 | {% endif -%} 53 | ) 54 | LAYOUT({{ config.get('layout') }}) 55 | LIFETIME({{ config.get('lifetime') }}) 56 | {%- if config.get('range') %} 57 | RANGE({{ config.get('range') }}) 58 | {%- endif %} 59 | {% endmacro %} 60 | 61 | 62 | {% macro http_source() %} 63 | HTTP(URL '{{ config.get("url") }}' FORMAT '{{ config.get("format") }}') 64 | {% endmacro %} 65 | 66 | 67 | {% macro clickhouse_source(sql) %} 68 | {%- set credentials = adapter.get_credentials(config.get("connection_overrides", {})) -%} 69 | {%- set table = config.get('table') -%} 70 | CLICKHOUSE( 71 | {% if credentials.get("user") -%} 72 | user '{{ credentials.get("user") }}' 73 | {%- endif %} 74 | {% if credentials.get("password") -%} 75 | password '{{ credentials.get("password") }}' 76 | {%- endif %} 77 | {% if credentials.get("database") -%} 78 | db '{{ credentials.get("database") }}' 79 | {%- endif %} 80 | {%- if table is not none %} 81 | table '{{ table }}' 82 | {% else %} 83 | query "{{ sql }}" 84 | {% endif -%} 85 | ) 86 | {% endmacro %} 87 | 88 | 89 | {% macro drop_dictionary_if_exists(relation, cluster_clause) %} 90 | {% if relation.type != 'dictionary' %} 91 | {{ log(relation ~ ' is not a dictionary; defaulting to drop_relation_if_exists') }} 92 | {{ drop_relation_if_exists(relation) }} 93 | {% else %} 94 | {% call statement('drop_dictionary_if_exists') %} 95 | drop dictionary if exists {{ relation }} {{ cluster_clause }} 96 | {% endcall %} 97 | {% endif %} 98 | {% endmacro %} 99 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/distributed_table.sql: -------------------------------------------------------------------------------- 1 | {% materialization distributed_table, adapter='clickhouse' %} 2 | {% set insert_distributed_sync = run_query("SELECT value FROM system.settings WHERE name = 'insert_distributed_sync'")[0][0] %} 3 | {% if insert_distributed_sync != '1' %} 4 | {% do exceptions.raise_compiler_error('To use distributed materialization setting insert_distributed_sync should be set to 1') %} 5 | {% endif %} 6 | 7 | {%- set local_suffix = adapter.get_clickhouse_local_suffix() -%} 8 | {%- set local_db_prefix = adapter.get_clickhouse_local_db_prefix() -%} 9 | 10 | {%- set existing_relation = load_cached_relation(this) -%} 11 | {%- set target_relation = this.incorporate(type='table') -%} 12 | 13 | {% set on_cluster = on_cluster_clause(target_relation) %} 14 | {% if on_cluster.strip() == '' %} 15 | {% do exceptions.raise_compiler_error('To use distributed materialization cluster setting in dbt profile must be set') %} 16 | {% endif %} 17 | 18 | {% set existing_relation_local = existing_relation.incorporate(path={"identifier": this.identifier + local_suffix, "schema": local_db_prefix + this.schema}) if existing_relation is not none else none %} 19 | {% set target_relation_local = target_relation.incorporate(path={"identifier": this.identifier + local_suffix, "schema": local_db_prefix + this.schema}) if target_relation is not none else none %} 20 | 21 | {%- set backup_relation = none -%} 22 | {%- set preexisting_backup_relation = none -%} 23 | {%- set preexisting_intermediate_relation = none -%} 24 | 25 | {% if existing_relation_local is not none %} 26 | {%- set backup_relation_type = existing_relation_local.type -%} 27 | {%- set backup_relation = make_backup_relation(target_relation_local, backup_relation_type) -%} 28 | {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%} 29 | {% if not existing_relation.can_exchange %} 30 | {%- set intermediate_relation = make_intermediate_relation(target_relation_local) -%} 31 | {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%} 32 | {% endif %} 33 | {% endif %} 34 | {% set view_relation = default__make_temp_relation(target_relation, '__dbt_tmp') %} 35 | -- drop the temp relations if they exist already in the database 36 | {{ drop_relation_if_exists(preexisting_intermediate_relation) }} 37 | {{ drop_relation_if_exists(preexisting_backup_relation) }} 38 | {{ drop_relation_if_exists(view_relation) }} 39 | 40 | {% set grant_config = config.get('grants') %} 41 | 42 | {{ run_hooks(pre_hooks, inside_transaction=False) }} 43 | 44 | {% call statement('main') %} 45 | {{ create_view_as(view_relation, sql) }} 46 | {% endcall %} 47 | 48 | {{ run_hooks(pre_hooks, inside_transaction=True) }} 49 | 50 | {% if backup_relation is none %} 51 | {{ create_distributed_local_table(target_relation, target_relation_local, view_relation) }} 52 | {% elif existing_relation.can_exchange %} 53 | -- We can do an atomic exchange, so no need for an intermediate 54 | {% call statement('main') -%} 55 | {{ create_empty_table_from_relation(backup_relation, view_relation) }} 56 | {%- endcall %} 57 | {% do exchange_tables_atomic(backup_relation, existing_relation_local) %} 58 | {% else %} 59 | {% do run_query(create_empty_table_from_relation(intermediate_relation, view_relation)) or '' %} 60 | {{ adapter.rename_relation(existing_relation_local, backup_relation) }} 61 | {{ adapter.rename_relation(intermediate_relation, target_relation_local) }} 62 | {% endif %} 63 | {% do run_query(create_distributed_table(target_relation, target_relation_local)) or '' %} 64 | {% do run_query(clickhouse__insert_into(target_relation, sql)) or '' %} 65 | {{ drop_relation_if_exists(view_relation) }} 66 | -- cleanup 67 | {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} 68 | {% do apply_grants(target_relation_local, grant_config, should_revoke=should_revoke) %} 69 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 70 | 71 | {% do persist_docs(target_relation, model) %} 72 | {{ run_hooks(post_hooks, inside_transaction=True) }} 73 | {{ adapter.commit() }} 74 | {{ drop_relation_if_exists(backup_relation) }} 75 | {{ run_hooks(post_hooks, inside_transaction=False) }} 76 | {{ return({'relations': [target_relation]}) }} 77 | 78 | {% endmaterialization %} 79 | 80 | {% macro create_distributed_table(relation, local_relation) %} 81 | {%- set cluster = adapter.get_clickhouse_cluster_name() -%} 82 | {% if cluster is none %} 83 | {% do exceptions.raise_compiler_error('Cluster name should be defined for using distributed materializations, current is None') %} 84 | {% endif %} 85 | 86 | {%- set cluster = cluster[1:-1] -%} 87 | {%- set sharding = config.get('sharding_key') -%} 88 | 89 | create or replace table {{ relation }} {{ on_cluster_clause(relation) }} as {{ local_relation }} 90 | ENGINE = Distributed('{{ cluster}}', '{{ local_relation.schema }}', '{{ local_relation.name }}' 91 | {%- if sharding is not none and sharding.strip() != '' -%} 92 | , {{ sharding }} 93 | {%- else %} 94 | , rand() 95 | {% endif -%} 96 | ) 97 | {% endmacro %} 98 | 99 | {% macro create_empty_table_from_relation(relation, source_relation, sql=none) -%} 100 | {%- set sql_header = config.get('sql_header', none) -%} 101 | {%- if sql -%} 102 | {%- set columns = adapter.get_column_schema_from_query(sql) | list -%} 103 | {%- else -%} 104 | {%- set columns = adapter.get_columns_in_relation(source_relation) | list -%} 105 | {%- endif -%} 106 | {%- set col_list = [] -%} 107 | {% for col in columns %} 108 | {{col_list.append(col.name + ' ' + col.data_type) or '' }} 109 | {% endfor %} 110 | {{ sql_header if sql_header is not none }} 111 | 112 | create table {{ relation.include(database=False) }} 113 | {{ on_cluster_clause(relation) }} ( 114 | {{col_list | join(', ')}} 115 | 116 | {% if config.get('projections') %} 117 | {% set projections = config.get('projections') %} 118 | {% for projection in projections %} 119 | , PROJECTION {{ projection.get("name") }} ( 120 | {{ projection.get("query") }} 121 | ) 122 | {% endfor %} 123 | {% endif %} 124 | ) 125 | 126 | {{ engine_clause() }} 127 | {{ order_cols(label="order by") }} 128 | {{ primary_key_clause(label="primary key") }} 129 | {{ partition_cols(label="partition by") }} 130 | {{ ttl_config(label="ttl")}} 131 | {{ adapter.get_model_settings(model, config.get('engine', default='MergeTree')) }} 132 | {%- endmacro %} 133 | 134 | {% macro create_distributed_local_table(distributed_relation, shard_relation, structure_relation, sql_query=none) -%} 135 | {{ drop_relation_if_exists(shard_relation) }} 136 | {{ drop_relation_if_exists(distributed_relation) }} 137 | {{ create_schema(shard_relation) }} 138 | {% do run_query(create_empty_table_from_relation(shard_relation, structure_relation, sql_query)) or '' %} 139 | {% do run_query(create_distributed_table(distributed_relation, shard_relation)) or '' %} 140 | {% if sql_query is not none %} 141 | {% do run_query(clickhouse__insert_into(distributed_relation, sql_query)) or '' %} 142 | {% endif %} 143 | {%- endmacro %} 144 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/incremental/is_incremental.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro is_incremental() %} 3 | {#-- do not run introspective queries in parsing #} 4 | {% if not execute %} 5 | {{ return(False) }} 6 | {% else %} 7 | {% set relation = adapter.get_relation(this.database, this.schema, this.table) %} 8 | {{ return(relation is not none 9 | and relation.type == 'table' 10 | and (model.config.materialized == 'incremental' or model.config.materialized == 'distributed_incremental' ) 11 | and not should_full_refresh()) }} 12 | {% endif %} 13 | {% endmacro %} 14 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/incremental/schema_changes.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__apply_column_changes(column_changes, existing_relation, is_distributed=False) %} 2 | {{ log('Schema changes detected. Trying to apply the following changes: ' ~ column_changes) }} 3 | {%- set existing_local = none -%} 4 | {% if is_distributed %} 5 | {%- set local_suffix = adapter.get_clickhouse_local_suffix() -%} 6 | {%- set local_db_prefix = adapter.get_clickhouse_local_db_prefix() -%} 7 | {%- set existing_local = existing_relation.incorporate(path={"identifier": this.identifier + local_suffix, "schema": local_db_prefix + this.schema}) if existing_relation is not none else none -%} 8 | {% endif %} 9 | 10 | {% if column_changes.on_schema_change == 'append_new_columns' %} 11 | {% do clickhouse__add_columns(column_changes.columns_to_add, existing_relation, existing_local, is_distributed) %} 12 | 13 | {% elif column_changes.on_schema_change == 'sync_all_columns' %} 14 | {% do clickhouse__drop_columns(column_changes.columns_to_drop, existing_relation, existing_local, is_distributed) %} 15 | {% do clickhouse__add_columns(column_changes.columns_to_add, existing_relation, existing_local, is_distributed) %} 16 | {% do clickhouse__modify_columns(column_changes.columns_to_modify, existing_relation, existing_local, is_distributed) %} 17 | {% endif %} 18 | 19 | {% endmacro %} 20 | 21 | {% macro clickhouse__add_columns(columns, existing_relation, existing_local=none, is_distributed=False) %} 22 | {% for column in columns %} 23 | {% set alter_action -%} 24 | add column if not exists `{{ column.name }}` {{ column.data_type }} 25 | {%- endset %} 26 | {% do clickhouse__run_alter_table_command(alter_action, existing_relation, existing_local, is_distributed) %} 27 | {% endfor %} 28 | 29 | {% endmacro %} 30 | 31 | {% macro clickhouse__drop_columns(columns, existing_relation, existing_local=none, is_distributed=False) %} 32 | {% for column in columns %} 33 | {% set alter_action -%} 34 | drop column if exists `{{ column.name }}` 35 | {%- endset %} 36 | {% do clickhouse__run_alter_table_command(alter_action, existing_relation, existing_local, is_distributed) %} 37 | {% endfor %} 38 | 39 | {% endmacro %} 40 | 41 | {% macro clickhouse__modify_columns(columns, existing_relation, existing_local=none, is_distributed=False) %} 42 | {% for column in columns %} 43 | {% set alter_action -%} 44 | modify column if exists `{{ column.name }}` {{ column.data_type }} 45 | {%- endset %} 46 | {% do clickhouse__run_alter_table_command(alter_action, existing_relation, existing_local, is_distributed) %} 47 | {% endfor %} 48 | 49 | {% endmacro %} 50 | 51 | {% macro clickhouse__run_alter_table_command(alter_action, existing_relation, existing_local=none, is_distributed=False) %} 52 | {% if is_distributed %} 53 | {% call statement('alter_table') %} 54 | alter table {{ existing_local }} {{ on_cluster_clause(existing_relation) }} {{ alter_action }} 55 | {% endcall %} 56 | {% call statement('alter_table') %} 57 | alter table {{ existing_relation }} {{ on_cluster_clause(existing_relation) }} {{ alter_action }} 58 | {% endcall %} 59 | 60 | {% else %} 61 | {% call statement('alter_table') %} 62 | alter table {{ existing_relation }} {{ alter_action }} 63 | {% endcall %} 64 | {% endif %} 65 | 66 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/s3.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse_s3source(config_name='', bucket='', path='', fmt='', structure='', 2 | aws_access_key_id='', aws_secret_access_key='', role_arn='', compression='') %} 3 | {% if config_name and not config_name.lower().endswith('s3') %} 4 | {{ exceptions.raise_compiler_error("S3 configuration should end with 's3'") }} 5 | {% endif %} 6 | {% set s3config = config.get(config_name, {}) %} 7 | {{ adapter.s3source_clause( 8 | config_name=config_name, 9 | s3_model_config=s3config, 10 | bucket=bucket, 11 | path=path, 12 | fmt=fmt, 13 | structure=structure, 14 | aws_access_key_id=aws_access_key_id, 15 | aws_secret_access_key=aws_secret_access_key, 16 | role_arn=role_arn, 17 | compression=compression) }} 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/seed.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__load_csv_rows(model, agate_table) %} 2 | {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} 3 | {% set data_sql = adapter.get_csv_data(agate_table) %} 4 | 5 | {% if data_sql %} 6 | {% set sql -%} 7 | insert into {{ this.render() }} ({{ cols_sql }}) 8 | {{ adapter.get_model_query_settings(model) }} 9 | format CSV 10 | {{ data_sql }} 11 | {%- endset %} 12 | 13 | {% do adapter.add_query(sql, bindings=agate_table, abridge_sql_log=True) %} 14 | {% endif %} 15 | {% endmacro %} 16 | 17 | {% macro clickhouse__create_csv_table(model, agate_table) %} 18 | {%- set column_override = model['config'].get('column_types', {}) -%} 19 | {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} 20 | 21 | {% set sql %} 22 | create table {{ this.render() }} {{ on_cluster_clause(this) }} ( 23 | {%- for col_name in agate_table.column_names -%} 24 | {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} 25 | {%- set type = column_override.get(col_name, inferred_type) -%} 26 | {%- set column_name = (col_name | string) -%} 27 | {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%} 28 | {%- endfor -%} 29 | ) 30 | {{ engine_clause() }} 31 | {{ order_cols(label='order by') }} 32 | {{ partition_cols(label='partition by') }} 33 | {% endset %} 34 | 35 | {% call statement('_') -%} 36 | {{ sql }} 37 | {%- endcall %} 38 | 39 | {{ return(sql) }} 40 | {% endmacro %} 41 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/snapshot.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__snapshot_hash_arguments(args) -%} 2 | halfMD5({%- for arg in args -%} 3 | coalesce(cast({{ arg }} as varchar ), '') 4 | {% if not loop.last %} || '|' || {% endif %} 5 | {%- endfor -%}) 6 | {%- endmacro %} 7 | 8 | {% macro clickhouse__post_snapshot(staging_relation) %} 9 | {{ drop_relation_if_exists(staging_relation) }} 10 | {% endmacro %} 11 | 12 | {% macro build_snapshot_staging_table(strategy, sql, target_relation) %} 13 | {% set tmp_relation = make_temp_relation(target_relation) %} 14 | 15 | {% set select = snapshot_staging_table(strategy, sql, target_relation) %} 16 | 17 | {% call statement('build_snapshot_staging_relation') %} 18 | {{ create_table_as(False, tmp_relation, select) }} 19 | {% endcall %} 20 | 21 | {% do return(tmp_relation) %} 22 | {% endmacro %} 23 | 24 | {% macro clickhouse__snapshot_merge_sql(target, source, insert_cols) -%} 25 | {%- set insert_cols_csv = insert_cols | join(', ') -%} 26 | {%- set valid_to_col = adapter.quote('dbt_valid_to') -%} 27 | 28 | {%- set upsert = target.derivative('__snapshot_upsert') -%} 29 | {% call statement('create_upsert_relation') %} 30 | create table if not exists {{ upsert }} {{ on_cluster_clause(upsert) }} as {{ target }} 31 | {% endcall %} 32 | 33 | {% call statement('insert_unchanged_data') %} 34 | insert into {{ upsert }} ({{ insert_cols_csv }}) 35 | select {% for column in insert_cols -%} 36 | {{ column }} {%- if not loop.last %}, {%- endif %} 37 | {%- endfor %} 38 | from {{ target }} 39 | where dbt_scd_id not in ( 40 | select {{ source }}.dbt_scd_id from {{ source }} 41 | ) 42 | {% endcall %} 43 | 44 | {% call statement('insert_updated_and_deleted') %} 45 | insert into {{ upsert }} ({{ insert_cols_csv }}) 46 | with updates_and_deletes as ( 47 | select 48 | dbt_scd_id, 49 | dbt_valid_to 50 | from {{ source }} 51 | where dbt_change_type IN ('update', 'delete') 52 | ) 53 | select {% for column in insert_cols %} 54 | {%- if column == valid_to_col -%} 55 | updates_and_deletes.dbt_valid_to as dbt_valid_to 56 | {%- else -%} 57 | target.{{ column }} as {{ column }} 58 | {%- endif %} {%- if not loop.last %}, {%- endif %} 59 | {%- endfor %} 60 | from {{ target }} target 61 | join updates_and_deletes on target.dbt_scd_id = updates_and_deletes.dbt_scd_id; 62 | {% endcall %} 63 | 64 | {% call statement('insert_new') %} 65 | insert into {{ upsert }} ({{ insert_cols_csv }}) 66 | select {% for column in insert_cols -%} 67 | {{ column }} {%- if not loop.last %}, {%- endif %} 68 | {%- endfor %} 69 | from {{ source }} 70 | where {{ source }}.dbt_change_type IN ('insert'); 71 | {% endcall %} 72 | 73 | {% if target.can_exchange %} 74 | {% do exchange_tables_atomic(upsert, target) %} 75 | {% call statement('drop_exchanged_relation') %} 76 | drop table if exists {{ upsert }} {{ on_cluster_clause(upsert) }}; 77 | {% endcall %} 78 | {% else %} 79 | {% call statement('drop_target_relation') %} 80 | drop table if exists {{ target }} {{ on_cluster_clause(target) }}; 81 | {% endcall %} 82 | {% call statement('rename_upsert_relation') %} 83 | rename table {{ upsert }} to {{ target }}; 84 | {% endcall %} 85 | {% endif %} 86 | 87 | {% do return ('select 1') %} 88 | {% endmacro %} 89 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/materializations/view.sql: -------------------------------------------------------------------------------- 1 | {%- materialization view, adapter='clickhouse' -%} 2 | 3 | {%- set existing_relation = load_cached_relation(this) -%} 4 | {%- set target_relation = this.incorporate(type='view') -%} 5 | 6 | {% set grant_config = config.get('grants') %} 7 | 8 | {{ run_hooks(pre_hooks, inside_transaction=False) }} 9 | 10 | -- `BEGIN` happens here: 11 | {{ run_hooks(pre_hooks, inside_transaction=True) }} 12 | 13 | {% if existing_relation is none %} 14 | {{ log('Creating new relation ' + target_relation.name )}} 15 | {% else %} 16 | {{ log('Relation ' + target_relation.name + ' already exists, replacing it' )}} 17 | {% endif %} 18 | 19 | {% call statement('main') -%} 20 | {{ get_create_view_as_sql(target_relation, sql) }} 21 | {%- endcall %} 22 | 23 | -- cleanup 24 | {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} 25 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 26 | 27 | {% do persist_docs(target_relation, model) %} 28 | 29 | {{ run_hooks(post_hooks, inside_transaction=True) }} 30 | 31 | {{ adapter.commit() }} 32 | 33 | {{ run_hooks(post_hooks, inside_transaction=False) }} 34 | 35 | {{ return({'relations': [target_relation]}) }} 36 | 37 | {%- endmaterialization -%} 38 | 39 | 40 | {% macro get_sql_security_clause(relation) %} 41 | {% set sql_security = config.get('sql_security') %} 42 | {% if sql_security -%} 43 | {% if sql_security == 'definer' -%} 44 | {%- set definer = config.require('definer') -%} 45 | {% if not definer -%} 46 | {{ exceptions.raise_compiler_error("Invalid config parameter `definer`. No value was provided.") }} 47 | {%- endif %} 48 | DEFINER = {{ definer }} SQL SECURITY DEFINER 49 | {%- elif sql_security == 'invoker' %} 50 | SQL SECURITY INVOKER 51 | {%- else %} 52 | {{ exceptions.raise_compiler_error("Invalid config parameter `sql_security`. Got: `" + sql_security + "`, but only definer | invoker allowed.") }} 53 | {%- endif %} 54 | {%- endif %} 55 | {%- endmacro -%} 56 | 57 | 58 | {% macro clickhouse__create_view_as(relation, sql) -%} 59 | {%- set sql_header = config.get('sql_header', none) -%} 60 | {{ sql_header if sql_header is not none }} 61 | 62 | create or replace view {{ relation.include(database=False) }} {{ on_cluster_clause(relation) }} 63 | {{ get_sql_security_clause(relation) }} 64 | {% set contract_config = config.get('contract') %} 65 | {% if contract_config.enforced %} 66 | {{ get_assert_columns_equivalent(sql) }} 67 | {%- endif %} 68 | as ( 69 | {{ sql }} 70 | {{ adapter.get_model_query_settings(model) }} 71 | ) 72 | {% if model.get('config').get('materialized') == 'view' %} 73 | {{ adapter.get_model_settings(model, config.get('engine', default='MergeTree')) }} 74 | {%- endif %} 75 | 76 | {%- endmacro %} 77 | 78 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/persist_docs.sql: -------------------------------------------------------------------------------- 1 | {% macro one_alter_relation(relation, alter_comments) %} 2 | alter table {{ relation }} {{ on_cluster_clause(relation) }} {{ alter_comments }} 3 | {% endmacro %} 4 | 5 | {% macro one_alter_column_comment(relation, column_name, comment) %} 6 | alter table {{ relation }} {{ on_cluster_clause(relation) }} comment column `{{ column_name }}` '{{ comment }}' 7 | {% endmacro %} 8 | 9 | {% macro clickhouse__alter_relation_comment(relation, comment) %} 10 | alter table {{ relation }} {{ on_cluster_clause(relation) }} modify comment '{{ comment }}' 11 | {% endmacro %} 12 | 13 | {% macro clickhouse__persist_docs(relation, model, for_relation, for_columns) %} 14 | {%- set alter_comments = [] %} 15 | 16 | {%- if for_relation and config.persist_relation_docs() and model.description -%} 17 | {% set escaped_comment = clickhouse_escape_comment(model.description) %} 18 | {% do alter_comments.append("modify comment {comment}".format(comment=escaped_comment)) %} 19 | {%- endif -%} 20 | 21 | {%- if for_columns and config.persist_column_docs() and model.columns -%} 22 | {% set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list %} 23 | {% for column_name in model.columns if (column_name in existing_columns) %} 24 | {%- set comment = model.columns[column_name]['description'] -%} 25 | {%- if comment %} 26 | {% set escaped_comment = clickhouse_escape_comment(comment) %} 27 | {% do alter_comments.append("comment column `{column_name}` {comment}".format(column_name=column_name, comment=escaped_comment)) %} 28 | {%- endif %} 29 | {%- endfor -%} 30 | {%- endif -%} 31 | 32 | {%- if alter_comments | length > 0 -%} 33 | {% do run_query(one_alter_relation(relation, alter_comments|join(', '))) %} 34 | {%- endif -%} 35 | {% endmacro %} 36 | 37 | {# 38 | By using dollar-quoting like this, users can embed anything they want into their comments 39 | (including nested dollar-quoting), as long as they do not use this exact dollar-quoting 40 | label. It would be nice to just pick a new one but eventually you do have to give up. 41 | #} 42 | {% macro clickhouse_escape_comment(comment) -%} 43 | {% if adapter.is_before_version('21.9.2.17') %} 44 | {% do exceptions.raise_compiler_error('Unsupported ClickHouse version for using heredoc syntax') %} 45 | {% endif %} 46 | {% if comment is not string %} 47 | {% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %} 48 | {% endif %} 49 | {%- set magic = '$dbt_comment_literal_block$' -%} 50 | {%- if magic in comment -%} 51 | {%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%} 52 | {%- endif -%} 53 | {{ magic }}{{ comment }}{{ magic }} 54 | {%- endmacro %} 55 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/schema_tests/relationships.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__test_relationships(model, column_name, to, field) %} 2 | 3 | with child as ( 4 | select {{ column_name }} as from_field 5 | from {{ model }} 6 | where {{ column_name }} is not null 7 | ), 8 | 9 | parent as ( 10 | select {{ field }} as to_field 11 | from {{ to }} 12 | ) 13 | 14 | select 15 | from_field 16 | 17 | from child 18 | left join parent 19 | on child.from_field = parent.to_field 20 | 21 | where parent.to_field is null 22 | -- end_of_sql 23 | settings join_use_nulls = 1 24 | 25 | {% endmacro %} 26 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/utils/datatypes.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/dbt/include/clickhouse/macros/utils/datatypes.sql -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/utils/timestamps.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__current_timestamp() -%} 2 | now() 3 | {%- endmacro %} 4 | 5 | {% macro clickhouse__snapshot_string_as_time(timestamp) -%} 6 | {%- set result = "toDateTime('" ~ timestamp ~ "')" -%} 7 | {{ return(result) }} 8 | {%- endmacro %} -------------------------------------------------------------------------------- /dbt/include/clickhouse/macros/utils/utils.sql: -------------------------------------------------------------------------------- 1 | {% macro clickhouse__get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%} 2 | {% set main_sql_formatted = clickhouse__place_limit(main_sql, limit) if limit !=None else main_sql%} 3 | select 4 | {{ fail_calc }} as failures, 5 | {{ fail_calc }} {{ warn_if }} as should_warn, 6 | {{ fail_calc }} {{ error_if }} as should_error 7 | from ( 8 | {{ main_sql_formatted }} 9 | ) dbt_internal_test 10 | 11 | {%- endmacro %} 12 | 13 | 14 | -- This macro is designed to add a LIMIT clause to a ClickHouse SQL query while preserving any ClickHouse settings specified in the query. 15 | -- When multiple queries are nested, the limit will be attached to the outer query 16 | {% macro clickhouse__place_limit(query, limit) -%} 17 | {% if 'settings' in query.lower()%} 18 | {% if '-- end_of_sql' not in query.lower()%} 19 | {{exceptions.raise_compiler_error("-- end_of_sql must be set when using ClickHouse settings")}} 20 | {% endif %} 21 | {% set split_by_settings_sections = query.split("-- end_of_sql")%} 22 | {% set split_by_settings_sections_with_limit = split_by_settings_sections[-2] + "\n LIMIT " + limit|string + "\n" %} 23 | {% set query_with_limit = "-- end_of_sql".join(split_by_settings_sections[:-2] + [split_by_settings_sections_with_limit, split_by_settings_sections[-1]])%} 24 | {{query_with_limit}} 25 | {% else %} 26 | {{query}} 27 | {{"limit " ~ limit}} 28 | {% endif %} 29 | {%- endmacro %} 30 | 31 | {% macro clickhouse__any_value(expression) -%} 32 | any({{ expression }}) 33 | {%- endmacro %} 34 | 35 | 36 | {% macro clickhouse__bool_or(expression) -%} 37 | max({{ expression }}) > 0 38 | {%- endmacro %} 39 | 40 | 41 | {% macro clickhouse__cast_bool_to_text(field) %} 42 | multiIf({{ field }} > 0, 'true', {{ field }} = 0, 'false', NULL) 43 | {% endmacro %} 44 | 45 | 46 | {% macro clickhouse__hash(field) -%} 47 | lower(hex(MD5(toString({{ field }} )))) 48 | {%- endmacro %} 49 | 50 | 51 | {%- macro clickhouse__last_day(date, datepart) -%} 52 | {{ dbt.dateadd('day', '-1', dbt.dateadd(datepart, '1', dbt.date_trunc(datepart, date)))}} 53 | {%- endmacro -%} 54 | 55 | 56 | {% macro clickhouse__split_part(string_text, delimiter_text, part_number) %} 57 | splitByString({{delimiter_text}}, {{ string_text }})[{{ part_number }}] 58 | {% endmacro %} 59 | 60 | 61 | {% macro clickhouse__replace(field, old_chars, new_chars) %} 62 | replaceAll({{ field }},'{{ old_chars }}','{{ new_chars }}') 63 | {% endmacro %} 64 | 65 | 66 | {% macro clickhouse__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} 67 | {% if order_by_clause and 'order by' == ' '.join(order_by_clause.split()[:2]).lower() -%} 68 | {% set order_by_clause_tokens = order_by_clause.split() %} 69 | {% if ',' in order_by_clause_tokens %} 70 | {{ exceptions.raise_compiler_error( 71 | 'ClickHouse does not support multiple order by fields.') 72 | }} 73 | {%- endif %} 74 | {% set order_by_clause_tokens = order_by_clause_tokens[2:] %} 75 | {% set sort_direction = '' %} 76 | {% if 'desc' in ''.join(order_by_clause_tokens[1:]).lower() %} 77 | {% set sort_direction = 'Reverse' %} 78 | {% endif %} 79 | {% set order_by_field = order_by_clause_tokens[0] %} 80 | 81 | {% set arr = "arrayMap(x -> x.1, array{}Sort(x -> x.2, arrayZip(array_agg({}), array_agg({}))))".format(sort_direction, measure, order_by_field) %} 82 | {% else -%} 83 | {% set arr = "array_agg({})".format(measure) %} 84 | {%- endif %} 85 | 86 | {% if limit_num -%} 87 | arrayStringConcat(arraySlice({{ arr }}, 1, {{ limit_num }}), {{delimiter_text}}) 88 | {% else -%} 89 | arrayStringConcat({{ arr }}, {{delimiter_text}}) 90 | {%- endif %} 91 | {%- endmacro %} 92 | 93 | 94 | {% macro clickhouse__array_construct(inputs, data_type) -%} 95 | {% if inputs|length > 0 %} 96 | [ {{ inputs|join(' , ') }} ] 97 | {% else %} 98 | emptyArray{{data_type}}() 99 | {% endif %} 100 | {%- endmacro %} 101 | 102 | 103 | {% macro clickhouse__array_append(array, new_element) -%} 104 | arrayPushBack({{ array }}, {{ new_element }}) 105 | {% endmacro %} 106 | 107 | 108 | {% macro clickhouse__array_concat(array_1, array_2) -%} 109 | arrayConcat({{ array_1 }}, {{ array_2 }}) 110 | {% endmacro %} 111 | 112 | -------------------------------------------------------------------------------- /dbt/include/clickhouse/sample_profiles.yml: -------------------------------------------------------------------------------- 1 | --- 2 | default: 3 | outputs: 4 | 5 | dev: 6 | type: clickhouse 7 | schema: [database name] 8 | host: [host] 9 | 10 | port: [port] 11 | user: [user] 12 | password: [password] 13 | 14 | prod: 15 | type: clickhouse 16 | schema: [database name] 17 | host: [host] 18 | 19 | port: [port] 20 | user: [user] 21 | 22 | target: dev 23 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | dbt-core>=1.9.0,<1.10 2 | dbt-adapters>=1.10,<2.0 3 | dbt-tests-adapter>=1.10,<2.0 4 | clickhouse-connect>=0.7.6 5 | clickhouse-driver>=0.2.7 6 | pytest>=7.2.0 7 | pytest-dotenv==0.5.2 8 | black==24.3.0 9 | isort==5.10.1 10 | mypy==0.991 11 | yamllint==1.26.3 12 | types-requests==2.27.29 13 | agate~=1.7.1 14 | requests~=2.27.1 15 | setuptools>=69.2.0 16 | types-setuptools>=69.2.0 -------------------------------------------------------------------------------- /etc/chdbt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/etc/chdbt.png -------------------------------------------------------------------------------- /examples/taxis/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /examples/taxis/README.md: -------------------------------------------------------------------------------- 1 | # Taxis Large Incremental Model Example/Test 2 | 3 | This is an example designed to test large incremental materializations. It builds a `taxis_inc` model in the 4 | `taxis_dbt` database that uses randomized keys to increase on each subsequent run. 5 | 6 | ## Create the source data 7 | 8 | Use this SQL to create and populate the "source" data from the ClickHouse taxis example dataset. 9 | 10 | ```sql 11 | 12 | CREATE DATABASE taxis; 13 | 14 | CREATE TABLE taxis.trips ( 15 | trip_id UInt32, 16 | pickup_datetime DateTime, 17 | dropoff_datetime DateTime, 18 | pickup_longitude Nullable(Float64), 19 | pickup_latitude Nullable(Float64), 20 | dropoff_longitude Nullable(Float64), 21 | dropoff_latitude Nullable(Float64), 22 | passenger_count UInt8, 23 | trip_distance Float32, 24 | fare_amount Float32, 25 | extra Float32, 26 | tip_amount Float32, 27 | tolls_amount Float32, 28 | total_amount Float32, 29 | payment_type LowCardinality(String), 30 | pickup_ntaname LowCardinality(String), 31 | dropoff_ntaname LowCardinality(String) 32 | ) 33 | ENGINE = MergeTree 34 | ORDER BY trip_id; 35 | 36 | SET input_format_skip_unknown_fields = 1; 37 | 38 | INSERT INTO taxis.trips 39 | SELECT 40 | trip_id, 41 | pickup_datetime, 42 | dropoff_datetime, 43 | pickup_longitude, 44 | pickup_latitude, 45 | dropoff_longitude, 46 | dropoff_latitude, 47 | passenger_count, 48 | trip_distance, 49 | fare_amount, 50 | extra, 51 | tip_amount, 52 | tolls_amount, 53 | total_amount, 54 | payment_type, 55 | pickup_ntaname, 56 | dropoff_ntaname 57 | FROM s3( 58 | 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..10}.gz', 59 | 'TabSeparatedWithNames' 60 | ); 61 | ``` 62 | 63 | ## Create a dbt profile entry 64 | 65 | Use the following profile to create the associated dbt profile in the dbt_profiles.yml in ~/.dbt 66 | ```yml 67 | taxis: 68 | outputs: 69 | 70 | dev: 71 | type: clickhouse 72 | threads: 4 73 | host: localhost 74 | port: 8123 75 | user: dbt_test 76 | password: dbt_password 77 | use_lw_deletes: true 78 | schema: taxis_dbt 79 | 80 | target: dev 81 | 82 | ``` 83 | 84 | ## Run the model 85 | 86 | `dbt run` in this directory should execute the model. Each run will create a somewhat larger dataset (by adding 87 | additional random trip_ids). 88 | -------------------------------------------------------------------------------- /examples/taxis/analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/examples/taxis/analyses/.gitkeep -------------------------------------------------------------------------------- /examples/taxis/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'taxis' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'taxis' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `model-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | model-paths: ["models"] 16 | analysis-paths: ["analyses"] 17 | test-paths: ["tests"] 18 | seed-paths: ["seeds"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | 22 | target-path: "target" # directory which will store compiled SQL files 23 | clean-targets: # directories to be removed by `dbt clean` 24 | - "target" 25 | - "dbt_packages" 26 | 27 | vars: 28 | taxi_s3: 29 | bucket: 'datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi' 30 | fmt: 'TabSeparatedWithNames' 31 | structure: 32 | - trip_id UInt32 33 | - pickup_datetime DateTime 34 | - dropoff_datetime DateTime 35 | - pickup_longitude Nullable(Float64) 36 | - pickup_latitude Nullable(Float64) 37 | - dropoff_longitude Nullable(Float64) 38 | - dropoff_latitude Nullable(Float64) 39 | - passenger_count UInt8 40 | - trip_distance Float32 41 | - fare_amount Float32 42 | - extra Float32 43 | - tip_amount Float32 44 | - tolls_amount Float32 45 | - total_amount Float32 46 | - payment_type LowCardinality(String) 47 | - pickup_ntaname LowCardinality(String) 48 | - dropoff_ntaname LowCardinality(String) 49 | -------------------------------------------------------------------------------- /examples/taxis/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/examples/taxis/macros/.gitkeep -------------------------------------------------------------------------------- /examples/taxis/models/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: trips_inc 5 | description: NY Taxi dataset from S3 6 | config: 7 | materialized: incremental 8 | order_by: rand_trip_id 9 | unique_key: rand_trip_id 10 | 11 | - name: trips_rand 12 | description: Random indexes to apply to incremental materialization 13 | config: 14 | materialized: incremental 15 | order_by: date_time 16 | uniq_id: date_time 17 | incremental_strategy: append 18 | -------------------------------------------------------------------------------- /examples/taxis/models/sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: taxis_source 5 | database: taxis 6 | description: This references the main taxi table as copied from S3 7 | tables: 8 | - database: taxis 9 | name: trips 10 | -------------------------------------------------------------------------------- /examples/taxis/models/trips_inc.sql: -------------------------------------------------------------------------------- 1 | with (select start, end from {{ ref('trips_rand') }} ORDER BY date_time DESC LIMIT 1) as range, 2 | (select count() from {{ ref('trips_rand') }}) as run_num 3 | 4 | select rand() as rand_trip_id, * EXCEPT trip_id, run_num, trip_id as orig_id from {{ source('taxis_source', 'trips') }} 5 | LEFT JOIN numbers(24) as sysnum ON 1 = 1 6 | where bitAnd(orig_id, 1023) between range.1 and range.2 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/taxis/models/trips_rand.sql: -------------------------------------------------------------------------------- 1 | SELECT now64() as date_time, rand() % 512 as start, rand() % (1023 - start) + start as end -------------------------------------------------------------------------------- /examples/taxis/seeds/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/examples/taxis/seeds/.gitkeep -------------------------------------------------------------------------------- /examples/taxis/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/examples/taxis/snapshots/.gitkeep -------------------------------------------------------------------------------- /examples/taxis/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/dbt-clickhouse/6177aeba2a4d40fff77849b40998538230f5d102/examples/taxis/tests/.gitkeep -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True 3 | follow_imports = skip 4 | exclude = .eggs|.git|.mypy_cache|.venv|venv|env|venv.*|_build|build|build|dist -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | skip-string-normalization = true 4 | target-version = ['py310', 'py311', 'py312'] 5 | exclude = '(\.eggs|\.git|\.mypy_cache|\.venv|venv|env|_build|build|build|dist|)' 6 | 7 | [tool.isort] 8 | line_length = 100 9 | profile = "black" 10 | use_parentheses = true 11 | skip = '.eggs/,.mypy_cache/,.venv/,venv/,env/' 12 | 13 | [tool.pytest.ini_options] 14 | log_cli = true 15 | log_cli_level = "WARNING" 16 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning 4 | ignore:.*: ResourceWarning 5 | env_files = 6 | test.env 7 | pythonpath = . 8 | testpaths = 9 | tests/integration # name per convention -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import re 5 | 6 | from setuptools import find_namespace_packages, setup 7 | 8 | this_directory = os.path.abspath(os.path.dirname(__file__)) 9 | with open(os.path.join(this_directory, 'README.md')) as f: 10 | long_description = f.read() 11 | 12 | 13 | # get this from a separate file 14 | def _dbt_clickhouse_version(): 15 | _version_path = os.path.join(this_directory, 'dbt', 'adapters', 'clickhouse', '__version__.py') 16 | _version_pattern = r'''version\s*=\s*["'](.+)["']''' 17 | with open(_version_path) as f: 18 | match = re.search(_version_pattern, f.read().strip()) 19 | if match is None: 20 | raise ValueError(f'invalid version at {_version_path}') 21 | return match.group(1) 22 | 23 | 24 | package_name = 'dbt-clickhouse' 25 | package_version = _dbt_clickhouse_version() 26 | description = '''The Clickhouse plugin for dbt (data build tool)''' 27 | 28 | dbt_minor_version = '1.9' 29 | 30 | if not package_version.startswith(dbt_minor_version): 31 | raise ValueError( 32 | f'Invalid setup.py: package_version={package_version} must start with ' 33 | f'dbt_version={dbt_minor_version}' 34 | ) 35 | 36 | 37 | setup( 38 | name=package_name, 39 | version=package_version, 40 | description=description, 41 | long_description=long_description, 42 | long_description_content_type='text/markdown', 43 | author='ClickHouse Inc.', 44 | author_email='guy@clickhouse.com', 45 | url='https://github.com/ClickHouse/dbt-clickhouse', 46 | license='MIT', 47 | packages=find_namespace_packages(include=['dbt', 'dbt.*']), 48 | package_data={ 49 | 'dbt': [ 50 | 'include/clickhouse/dbt_project.yml', 51 | 'include/clickhouse/macros/*.sql', 52 | 'include/clickhouse/macros/**/*.sql', 53 | ] 54 | }, 55 | install_requires=[ 56 | f'dbt-core>={dbt_minor_version}', 57 | 'clickhouse-connect>=0.6.22', 58 | 'clickhouse-driver>=0.2.6', 59 | 'setuptools>=0.69', 60 | ], 61 | python_requires=">=3.9", 62 | platforms='any', 63 | classifiers=[ 64 | 'Development Status :: 5 - Production/Stable', 65 | 'License :: OSI Approved :: Apache Software License', 66 | 'Operating System :: Microsoft :: Windows', 67 | 'Operating System :: MacOS :: MacOS X', 68 | 'Operating System :: POSIX :: Linux', 69 | 'Programming Language :: Python :: 3.9', 70 | 'Programming Language :: Python :: 3.10', 71 | 'Programming Language :: Python :: 3.11', 72 | 'Programming Language :: Python :: 3.12', 73 | ], 74 | ) 75 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | os.environ['TZ'] = 'UTC' 5 | time.tzset() 6 | 7 | 8 | # Import the standard integration fixtures as a plugin 9 | # Note: fixtures with session scope need to be local 10 | pytest_plugins = ["dbt.tests.fixtures.project"] 11 | -------------------------------------------------------------------------------- /tests/integration/adapter/aliases/test_aliases.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dbt.tests.adapter.aliases.fixtures import ( 5 | MODELS__ALIAS_IN_PROJECT_SQL, 6 | MODELS__ALIAS_IN_PROJECT_WITH_OVERRIDE_SQL, 7 | MODELS__SCHEMA_YML, 8 | ) 9 | from dbt.tests.adapter.aliases.test_aliases import ( 10 | BaseAliasErrors, 11 | BaseAliases, 12 | BaseSameAliasDifferentDatabases, 13 | BaseSameAliasDifferentSchemas, 14 | ) 15 | from dbt.tests.util import relation_from_name, run_dbt 16 | 17 | MODELS__DISTRIBUTED_FOO_ALIAS_SQL = """ 18 | 19 | {{ 20 | config( 21 | alias='foo', 22 | materialized='distributed_table' 23 | ) 24 | }} 25 | 26 | select {{ string_literal(this.name) }} as tablename 27 | 28 | """ 29 | 30 | MODELS__DISTRIBUTED_REF_FOO_ALIAS_SQL = """ 31 | 32 | {{ 33 | config( 34 | materialized='distributed_table' 35 | ) 36 | }} 37 | 38 | with trigger_ref as ( 39 | 40 | -- we should still be able to ref a model by its filepath 41 | select * from {{ ref('foo_alias') }} 42 | 43 | ) 44 | 45 | -- this name should still be the filename 46 | select {{ string_literal(this.name) }} as tablename 47 | 48 | """ 49 | 50 | 51 | class TestAliases(BaseAliases): 52 | pass 53 | 54 | 55 | class TestAliasErrors(BaseAliasErrors): 56 | pass 57 | 58 | 59 | class TestSameAliasDifferentSchemas(BaseSameAliasDifferentSchemas): 60 | pass 61 | 62 | 63 | class TestSameAliasDifferentDatabases(BaseSameAliasDifferentDatabases): 64 | pass 65 | 66 | 67 | class TestDistributedAliases(BaseAliases): 68 | @pytest.fixture(scope="class") 69 | def models(self): 70 | return { 71 | "schema.yml": MODELS__SCHEMA_YML, 72 | "foo_alias.sql": MODELS__DISTRIBUTED_FOO_ALIAS_SQL, 73 | "alias_in_project.sql": MODELS__ALIAS_IN_PROJECT_SQL, 74 | "alias_in_project_with_override.sql": MODELS__ALIAS_IN_PROJECT_WITH_OVERRIDE_SQL, 75 | "ref_foo_alias.sql": MODELS__DISTRIBUTED_REF_FOO_ALIAS_SQL, 76 | } 77 | 78 | @pytest.mark.skipif( 79 | os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' 80 | ) 81 | def test_alias_model_name(self, project): 82 | results = run_dbt(["run"]) 83 | assert len(results) == 4 84 | 85 | cluster = project.test_config['cluster'] 86 | local_relation = relation_from_name(project.adapter, "foo_local") 87 | 88 | result = project.run_sql( 89 | f"select max(tablename) AS tablename From clusterAllReplicas('{cluster}', {local_relation}) ", 90 | fetch="one", 91 | ) 92 | assert result[0] == "foo" 93 | 94 | local_relation = relation_from_name(project.adapter, "ref_foo_alias_local") 95 | result = project.run_sql( 96 | f"select max(tablename) AS tablename From clusterAllReplicas('{cluster}', {local_relation}) ", 97 | fetch="one", 98 | ) 99 | assert result[0] == "ref_foo_alias" 100 | 101 | run_dbt(["test"]) 102 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_adapter_methods.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod 2 | 3 | 4 | class TestBaseAdapterMethod(BaseAdapterMethod): 5 | pass 6 | 7 | 8 | class TestBaseCaching(BaseAdapterMethod): 9 | pass 10 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_base.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations 2 | 3 | 4 | class TestBaseSimpleMaterializations(BaseSimpleMaterializations): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dbt.tests.util import run_dbt 5 | 6 | # CSV content with boolean column type. 7 | seeds_boolean_csv = """ 8 | key,value 9 | abc,true 10 | def,false 11 | hij,true 12 | klm,false 13 | """.lstrip() 14 | 15 | # CSV content with empty fields. 16 | seeds_empty_csv = """ 17 | key,val1,val2,str1 18 | abc,1,1,some_str 19 | abc,1,0,"another string" 20 | def,1,0, 21 | hij,1,1,Caps 22 | hij,1,,"second string" 23 | klm,1,0,"test" 24 | klm,1,,"test4" 25 | """.lstrip() 26 | 27 | seeds_schema_yml = """ 28 | version: 2 29 | 30 | seeds: 31 | - name: empty 32 | config: 33 | column_types: 34 | val2: Nullable(UInt32) 35 | str1: Nullable(String) 36 | settings: 37 | allow_nullable_key: 1 38 | """ 39 | 40 | replicated_seeds_schema_yml = """ 41 | version: 2 42 | 43 | seeds: 44 | - name: empty 45 | config: 46 | engine: ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' ) 47 | column_types: 48 | val2: Nullable(UInt32) 49 | str1: Nullable(String) 50 | """ 51 | 52 | base_seeds_schema_yml = """ 53 | version: 2 54 | 55 | seeds: 56 | - name: base 57 | config: 58 | engine: ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' ) 59 | """ 60 | 61 | 62 | class TestCSVSeed: 63 | @pytest.fixture(scope="class") 64 | def seeds(self): 65 | return { 66 | "schema.yml": seeds_schema_yml, 67 | "boolean.csv": seeds_boolean_csv, 68 | "empty.csv": seeds_empty_csv, 69 | } 70 | 71 | def test_seed(self, project): 72 | # seed command 73 | results = run_dbt(["seed"]) 74 | assert len(results) == 2 75 | columns = project.run_sql("DESCRIBE TABLE empty", fetch='all') 76 | assert columns[2][1] == 'Nullable(UInt32)' 77 | assert columns[3][1] == 'Nullable(String)' 78 | 79 | 80 | class TestReplicatedCSVSeed: 81 | @pytest.fixture(scope="class") 82 | def seeds(self): 83 | return { 84 | "schema.yml": replicated_seeds_schema_yml, 85 | "empty.csv": seeds_empty_csv, 86 | } 87 | 88 | @pytest.mark.skipif( 89 | os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' 90 | ) 91 | def test_seed(self, project): 92 | # seed command 93 | results = run_dbt(["seed"]) 94 | assert len(results) == 1 95 | columns = project.run_sql("DESCRIBE TABLE empty", fetch='all') 96 | assert columns[2][1] == 'Nullable(UInt32)' 97 | assert columns[3][1] == 'Nullable(String)' 98 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_empty.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_empty import BaseEmpty 2 | 3 | 4 | class TestEmpty(BaseEmpty): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_ephemeral.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral 2 | 3 | 4 | class TestEphemeral(BaseEphemeral): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_generic_tests.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests 2 | 3 | 4 | class TestGenericTests(BaseGenericTests): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_incremental.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.basic.test_incremental import BaseIncremental, BaseIncrementalNotSchemaChange 3 | 4 | 5 | class TestIncremental(BaseIncremental): 6 | pass 7 | 8 | 9 | incremental_not_schema_change_sql = """ 10 | {{ config(materialized="incremental", unique_key="user_id_current_time",on_schema_change="append_new_columns") }} 11 | select 12 | toString(1) || '-' || toString(now64()) as user_id_current_time, 13 | {% if is_incremental() %} 14 | 'thisis18characters' as platform 15 | {% else %} 16 | 'okthisis20characters' as platform 17 | {% endif %} 18 | """ 19 | 20 | 21 | class TestIncrementalNotSchemaChange(BaseIncrementalNotSchemaChange): 22 | @pytest.fixture(scope="class") 23 | def models(self): 24 | return {"incremental_not_schema_change.sql": incremental_not_schema_change_sql} 25 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_singular_tests.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests 2 | 3 | 4 | class TestSingularTests(BaseSingularTests): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_singular_tests_ephemeral.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_singular_tests_ephemeral import BaseSingularTestsEphemeral 2 | 3 | 4 | class TestSingularTestsEphemeral(BaseSingularTestsEphemeral): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_snapshot_check_cols.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols 2 | 3 | 4 | class TestSnapshotCheckCols(BaseSnapshotCheckCols): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_snapshot_timestamp.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp 2 | 3 | 4 | class TestSnapshotTimestamp(BaseSnapshotTimestamp): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_table_materialization.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_table_materialization import BaseTableMaterialization 2 | 3 | 4 | class TestTableMat(BaseTableMaterialization): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/basic/test_validate_connection.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.basic.test_validate_connection import BaseValidateConnection 2 | 3 | 4 | class TestValidateConnection(BaseValidateConnection): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/caching/test_caching.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | model_sql = """ 5 | {{ 6 | config( 7 | materialized='table' 8 | ) 9 | }} 10 | select 1 as id 11 | """ 12 | 13 | another_schema_model_sql = """ 14 | {{ 15 | config( 16 | materialized='table', 17 | schema='another_schema' 18 | ) 19 | }} 20 | select 1 as id 21 | """ 22 | 23 | 24 | class BaseCachingTest: 25 | @pytest.fixture(scope="class") 26 | def project_config_update(self): 27 | return { 28 | "config-version": 2, 29 | "quoting": { 30 | "identifier": False, 31 | "schema": False, 32 | }, 33 | } 34 | 35 | def run_and_inspect_cache(self, project, run_args=None): 36 | run_dbt(run_args) 37 | 38 | # the cache was empty at the start of the run. 39 | # the model materialization returned a relation and added to the cache. 40 | adapter = project.adapter 41 | assert len(adapter.cache.relations) == 1 42 | relation = list(adapter.cache.relations).pop() 43 | assert relation.schema == project.test_schema 44 | 45 | # on the second run, dbt will find a relation in the database during cache population. 46 | run_dbt(run_args) 47 | adapter = project.adapter 48 | assert len(adapter.cache.relations) == 1 49 | second_relation = list(adapter.cache.relations).pop() 50 | 51 | for key in ["schema", "identifier"]: 52 | assert getattr(relation, key) == getattr(second_relation, key) 53 | 54 | def test_cache(self, project): 55 | self.run_and_inspect_cache(project, run_args=["run"]) 56 | 57 | 58 | class TestNoPopulateCache(BaseCachingTest): 59 | @pytest.fixture(scope="class") 60 | def models(self): 61 | return { 62 | "model.sql": model_sql, 63 | } 64 | 65 | def test_cache(self, project): 66 | # --no-populate-cache still allows the cache to populate all relations 67 | # under a schema, so the behavior here remains the same as other tests 68 | run_args = ["--no-populate-cache", "run"] 69 | self.run_and_inspect_cache(project, run_args) 70 | 71 | 72 | class TestCachingLowerCaseModel(BaseCachingTest): 73 | @pytest.fixture(scope="class") 74 | def models(self): 75 | return { 76 | "model.sql": model_sql, 77 | } 78 | 79 | 80 | class TestCachingUppercaseModel(BaseCachingTest): 81 | @pytest.fixture(scope="class") 82 | def models(self): 83 | return { 84 | "MODEL.sql": model_sql, 85 | } 86 | 87 | 88 | class TestCachingSelectedSchemaOnly(BaseCachingTest): 89 | @pytest.fixture(scope="class") 90 | def models(self): 91 | return { 92 | "model.sql": model_sql, 93 | "another_schema_model.sql": another_schema_model_sql, 94 | } 95 | 96 | def test_cache(self, project): 97 | # this should only cache the schema containing the selected model 98 | run_args = ["--cache-selected-only", "run", "--select", "model"] 99 | self.run_and_inspect_cache(project, run_args) 100 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_comments.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pytest 5 | from dbt.tests.util import run_dbt 6 | 7 | ref_models__table_comment_sql = """ 8 | {{ 9 | config( 10 | materialized = "table", 11 | persist_docs = {"relation": true, "columns": true}, 12 | ) 13 | }} 14 | 15 | select 16 | 'foo' as first_name, 17 | 'bar' as second_name 18 | 19 | """ 20 | 21 | ref_models__view_comment_sql = """ 22 | {{ 23 | config( 24 | materialized = "view", 25 | persist_docs = {"relation": true, "columns": true}, 26 | ) 27 | }} 28 | 29 | select 30 | 'foo' as first_name, 31 | 'bar' as second_name 32 | 33 | """ 34 | 35 | ref_models__schema_yml = """ 36 | version: 2 37 | 38 | models: 39 | - name: table_comment 40 | description: "YYY table" 41 | columns: 42 | - name: first_name 43 | description: "XXX first description" 44 | - name: second_name 45 | description: "XXX second description" 46 | - name: view_comment 47 | description: "YYY view" 48 | columns: 49 | - name: first_name 50 | description: "XXX first description" 51 | - name: second_name 52 | description: "XXX second description" 53 | """ 54 | 55 | 56 | class TestBaseComment: 57 | @pytest.fixture(scope="class") 58 | def models(self): 59 | return { 60 | "schema.yml": ref_models__schema_yml, 61 | "table_comment.sql": ref_models__table_comment_sql, 62 | "view_comment.sql": ref_models__view_comment_sql, 63 | } 64 | 65 | @pytest.mark.parametrize( 66 | 'model_name', 67 | ['table_comment', 'view_comment'], 68 | ) 69 | def test_comment(self, project, model_name): 70 | if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'): 71 | pytest.skip('Not running comment test for cloud') 72 | run_dbt(["run"]) 73 | run_dbt(["docs", "generate"]) 74 | with open("target/catalog.json") as fp: 75 | catalog_data = json.load(fp) 76 | 77 | assert "nodes" in catalog_data 78 | column_node = catalog_data["nodes"][f"model.test.{model_name}"] 79 | for column in column_node["columns"].keys(): 80 | column_comment = column_node["columns"][column]["comment"] 81 | assert column_comment.startswith("XXX") 82 | 83 | assert column_node['metadata']['comment'].startswith("YYY") 84 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_errors.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | oom_table_sql = """ 5 | SELECT a FROM system.numbers_mt GROUP BY repeat(toString(number), 100000) as a 6 | """ 7 | 8 | schema_yaml = """ 9 | version: 2 10 | 11 | models: 12 | - name: oom_table 13 | description: Table that generates OOM 14 | config: 15 | materialized: table 16 | order_by: a 17 | """ 18 | 19 | 20 | class TestOOMError: 21 | @pytest.fixture(scope="class") 22 | def models(self): 23 | return { 24 | "schema.yml": schema_yaml, 25 | "oom_table.sql": oom_table_sql, 26 | } 27 | 28 | def test_oom(self, project): 29 | res = run_dbt(["run"], expect_pass=False) 30 | assert 'exceeded' in res.results[0].message 31 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_s3.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dbt.tests.util import run_dbt 5 | 6 | testing_s3 = os.environ.get('DBT_CH_TEST_INCLUDE_S3', '').lower() in ('1', 'true', 'yes') 7 | pytestmark = pytest.mark.skipif(not testing_s3, reason='Testing S3 disabled') 8 | 9 | schema_yaml = """ 10 | version: 2 11 | 12 | models: 13 | - name: s3_taxis_source 14 | description: NY Taxi dataset from S3 15 | config: 16 | materialized: table 17 | order_by: pickup_datetime 18 | unique_id: trip_id 19 | taxi_s3: 20 | structure: 21 | - 'trip_id UInt32' 22 | - 'pickup_datetime DateTime' 23 | - name: s3_taxis_inc 24 | """ 25 | 26 | s3_taxis_source = """ 27 | select * from {{ clickhouse_s3source('taxi_s3', path='/trips_4.gz') }} LIMIT 5000 28 | """ 29 | 30 | s3_taxis_full_source = """ 31 | select * from {{ clickhouse_s3source('taxi_s3', path='/trips_5.gz') }} LIMIT 1000 32 | """ 33 | 34 | s3_taxis_inc = """ 35 | {{ config( 36 | materialized='incremental', 37 | order_by='pickup_datetime', 38 | incremental_strategy='delete+insert', 39 | unique_key='trip_id', 40 | taxi_s3={"structure":['trip_id UInt32', 'pickup_datetime DateTime', 'passenger_count UInt8']} 41 | ) 42 | }} 43 | 44 | {% if is_incremental() %} 45 | select * from {{ clickhouse_s3source('taxi_s3', path='/trips_4.gz') }} 46 | where pickup_datetime > (SELECT addDays(max(pickup_datetime), -2) FROM s3_taxis_inc) 47 | {% else %} 48 | select trip_id, pickup_datetime, toUInt8(0) as passenger_count from s3_taxis_source 49 | {% endif %} 50 | LIMIT 5000 51 | """ 52 | 53 | 54 | class TestS3: 55 | @pytest.fixture(scope="class") 56 | def project_config_update(self): 57 | return { 58 | 'vars': { 59 | 'taxi_s3': { 60 | 'bucket': 'datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/', 61 | 'fmt': 'TabSeparatedWithNames', 62 | } 63 | } 64 | } 65 | 66 | @pytest.fixture(scope="class") 67 | def models(self): 68 | return { 69 | "s3_taxis_source.sql": s3_taxis_source, 70 | "s3_taxis_inc.sql": s3_taxis_inc, 71 | "schema.yml": schema_yaml, 72 | } 73 | 74 | def test_s3_incremental(self, project): 75 | run_dbt(["run", "--select", "s3_taxis_source.sql"]) 76 | result = project.run_sql("select count() as num_rows from s3_taxis_source", fetch="one") 77 | assert result[0] == 5000 78 | 79 | run_dbt(["run", "--select", "s3_taxis_inc.sql"]) 80 | result = project.run_sql( 81 | "select count(), sum(passenger_count) as num_rows from s3_taxis_inc", fetch="one" 82 | ) 83 | assert result == (5000, 0) 84 | 85 | run_dbt(["run", "--select", "s3_taxis_inc.sql"]) 86 | result = project.run_sql( 87 | "select count(), sum(passenger_count) as num_rows from s3_taxis_inc", fetch="one" 88 | ) 89 | assert 5000 < result[0] < 10000 90 | assert result[1] > 0 91 | 92 | 93 | class TestS3Bucket: 94 | @pytest.fixture(scope="class") 95 | def project_config_update(self): 96 | return { 97 | 'vars': { 98 | 'taxi_s3': { 99 | 'bucket': 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/', 100 | 'fmt': 'TabSeparatedWithNames', 101 | } 102 | } 103 | } 104 | 105 | @pytest.fixture(scope="class") 106 | def models(self): 107 | return { 108 | "s3_taxis_source.sql": s3_taxis_full_source, 109 | "schema.yml": schema_yaml, 110 | } 111 | 112 | def test_read(self, project): 113 | run_dbt(["run", "--select", "s3_taxis_source.sql"]) 114 | result = project.run_sql("select count() as num_rows from s3_taxis_source", fetch="one") 115 | assert result[0] == 1000 116 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_source_schema.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | system_source_yml = """ 5 | version: 2 6 | sources: 7 | - name: system_source 8 | database: system 9 | tables: 10 | - name: tables 11 | database: system 12 | """ 13 | 14 | 15 | class TestSourceSchema: 16 | @pytest.fixture(scope="class") 17 | def models(self): 18 | sys_tables_sql = """ 19 | {{ config(order_by='(database, name)', 20 | engine='MergeTree()', 21 | materialized='table', 22 | settings={'allow_nullable_key': 1}) }} 23 | 24 | select database, name, engine, total_rows from {{ source('system_source', 'tables') }} 25 | """ 26 | return { 27 | "sys_tables.sql": sys_tables_sql, 28 | "sources.yml": system_source_yml, 29 | } 30 | 31 | def test_source_schema(self, project): 32 | results = run_dbt() 33 | assert len(results) > 0 34 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_sql_header.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt_and_capture 3 | 4 | my_model_sql_header_sql = """ 5 | {{ 6 | config( 7 | materialized = "table", 8 | ) 9 | }} 10 | 11 | {% call set_sql_header(config) %} 12 | set log_comment = 'TEST_LOG_COMMENT'; 13 | {%- endcall %} 14 | select getSettings('log_comment') as column_name 15 | """ 16 | 17 | 18 | class TestSQLHeader: 19 | @pytest.fixture(scope="class") 20 | def models(self): 21 | return { 22 | "my_model_sql_header.sql": my_model_sql_header_sql, 23 | } 24 | 25 | def test__sql_header(self, project): 26 | _, log_output = run_dbt_and_capture(["run", "-s", "my_model_sql_header"], expect_pass=False) 27 | 28 | assert 'Multi-statements' in log_output 29 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_table_ttl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from datetime import datetime 4 | 5 | import pytest 6 | from dbt.tests.adapter.basic.files import model_base, schema_base_yml 7 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations 8 | from dbt.tests.util import relation_from_name, run_dbt 9 | 10 | 11 | class TestTableTTL(BaseSimpleMaterializations): 12 | @pytest.fixture(scope="class") 13 | def models(self): 14 | config_materialized_table = """ 15 | {{ config( 16 | order_by='(some_date, id, name)', 17 | engine='MergeTree()', 18 | materialized='table', 19 | settings={'allow_nullable_key': 1}, 20 | ttl='some_date + INTERVAL 5 SECONDS', 21 | query_settings={'allow_nondeterministic_mutations': 1}) 22 | }} 23 | """ 24 | base_table_sql = config_materialized_table + model_base 25 | return { 26 | "table_model.sql": base_table_sql, 27 | "schema.yml": schema_base_yml, 28 | } 29 | 30 | def test_base(self, project): 31 | # seed command 32 | results = run_dbt(["seed"]) 33 | # seed result length 34 | assert len(results) == 1 35 | 36 | # run command 37 | results = run_dbt() 38 | # run result length 39 | assert len(results) == 1 40 | 41 | # base table rowcount 42 | relation = relation_from_name(project.adapter, "table_model") 43 | result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") 44 | # the dates from the seed are too old, so those are expired 45 | assert result[0] == 0 46 | 47 | # insert new data 48 | now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 49 | project.run_sql(f"insert into {relation} (*) values (11, 'Elian', '{now}')") 50 | 51 | result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") 52 | # the dates from the seed are too old, so those are expired 53 | assert result[0] == 1 54 | 55 | # wait for TTL to expire 56 | time.sleep(6) 57 | 58 | # optimize table 59 | project.run_sql(f"OPTIMIZE TABLE {relation} FINAL") 60 | 61 | # make sure is empty 62 | result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") 63 | assert result[0] == 0 64 | 65 | 66 | DISTRIBUTED_TABLE_TTL_MODEL = """ 67 | {{ 68 | config( 69 | order_by='(id)', 70 | engine='MergeTree()', 71 | materialized='distributed_table', 72 | incremental_strategy='append', 73 | ttl='expiration_date + interval 5 seconds', 74 | ) 75 | }} 76 | SELECT 1 AS id, toDateTime('2010-05-20 06:46:51') AS expiration_date 77 | UNION ALL 78 | SELECT 2, toDateTime('2007-09-03 12:31:55') 79 | UNION ALL 80 | SELECT 3, toDateTime('2005-01-01 09:23:15') 81 | """ 82 | 83 | 84 | @pytest.mark.skipif( 85 | os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' 86 | ) 87 | class TestDistributedTableTTL: 88 | @pytest.fixture(scope="class") 89 | def models(self): 90 | return { 91 | "id_expire.sql": DISTRIBUTED_TABLE_TTL_MODEL, 92 | } 93 | 94 | def test_base(self, project): 95 | results = run_dbt() 96 | assert len(results) == 1 97 | 98 | relation = relation_from_name(project.adapter, "id_expire") 99 | relation_local = relation_from_name(project.adapter, "id_expire_local") 100 | 101 | # wait for TTL to expire 102 | time.sleep(6) 103 | 104 | project.run_sql(f"OPTIMIZE TABLE {relation_local} FINAL") 105 | 106 | # make sure is empty 107 | cnt = project.run_sql(f"select count(*) from {relation}", fetch="all") 108 | assert cnt[0][0] == 0 109 | -------------------------------------------------------------------------------- /tests/integration/adapter/clickhouse/test_clickhouse_upper_case.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | schema_upper_yml = """ 5 | version: 2 6 | sources: 7 | - name: seeds 8 | schema: "{{ target.schema }}" 9 | tables: 10 | - name: seeds_upper 11 | identifier: "{{ var('seed_name', 'seeds_upper') }}" 12 | """ 13 | 14 | 15 | seeds_upper_csv = """ 16 | id,name,some_date 17 | 1,Easton,1981-05-20T06:46:51 18 | 2,Lillian,1978-09-03T18:10:33 19 | 3,Jeremiah,1982-03-11T03:59:51 20 | 4,Nolan,1976-05-06T20:21:35 21 | 5,Hannah,1982-06-23T05:41:26 22 | 6,Eleanor,1991-08-10T23:12:21 23 | 7,Lily,1971-03-29T14:58:02 24 | 8,Jonathan,1988-02-26T02:55:24 25 | 9,Adrian,1994-02-09T13:14:23 26 | 10,Nora,1976-03-01T16:51:39 27 | """.lstrip() 28 | 29 | 30 | class TestUpperCase: 31 | @pytest.fixture(scope="class") 32 | def models(self): 33 | config_table_sql = """ 34 | {{ config(order_by='(some_date, id, name)', engine='MergeTree()', materialized='table', 35 | settings={'allow_nullable_key': 1}) }} 36 | 37 | select * from {{ source('seeds', 'seeds_upper') }} 38 | """ 39 | return { 40 | "UPPER.sql": config_table_sql, 41 | "schema.yml": schema_upper_yml, 42 | } 43 | 44 | @pytest.fixture(scope="class") 45 | def project_config_update(self): 46 | return { 47 | "name": "upper_test", 48 | } 49 | 50 | @pytest.fixture(scope="class") 51 | def seeds(self): 52 | return { 53 | "seeds_upper.csv": seeds_upper_csv, 54 | } 55 | 56 | def test_upper(self, project): 57 | results = run_dbt(["seed"]) 58 | assert len(results) == 1 59 | 60 | results = run_dbt() 61 | assert results[0].node.search_name == 'UPPER' 62 | 63 | results = run_dbt() 64 | assert results[0].node.search_name == 'UPPER' 65 | -------------------------------------------------------------------------------- /tests/integration/adapter/column_types/test_column_types.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.clickhouse import ClickHouseColumn 2 | 3 | 4 | class TestColumn: 5 | def test_base_types(self): 6 | verify_column('name', 'UInt8', False, False, False, True) 7 | verify_column('name', 'UInt16', False, False, False, True) 8 | verify_column('name', 'UInt32', False, False, False, True) 9 | verify_column('name', 'UInt64', False, False, False, True) 10 | verify_column('name', 'UInt128', False, False, False, True) 11 | verify_column('name', 'UInt256', False, False, False, True) 12 | verify_column('name', 'Int8', False, False, False, True) 13 | verify_column('name', 'Int16', False, False, False, True) 14 | verify_column('name', 'Int32', False, False, False, True) 15 | verify_column('name', 'Int64', False, False, False, True) 16 | verify_column('name', 'Int128', False, False, False, True) 17 | verify_column('name', 'Int256', False, False, False, True) 18 | str_col = verify_column('name', 'String', True, False, False, False) 19 | assert str_col.string_size() == 256 20 | fixed_str_col = verify_column('name', 'FixedString', True, False, False, False) 21 | assert fixed_str_col.string_size() == 256 22 | fixed_str_col = verify_column('name', 'FixedString(16)', True, False, False, False) 23 | assert fixed_str_col.string_size() == 16 24 | verify_column('name', 'Decimal(6, 6)', False, True, False, False) 25 | verify_column('name', 'Float32', False, False, True, False) 26 | verify_column('name', 'Float64', False, False, True, False) 27 | verify_column('name', 'Float64', False, False, True, False) 28 | verify_column('name', 'Date', False, False, False, False) 29 | verify_column('name', 'Date32', False, False, False, False) 30 | verify_column('name', "DateTime('Asia/Istanbul')", False, False, False, False) 31 | verify_column('name', "UUID", False, False, False, False) 32 | 33 | def test_array_type(self): 34 | # Test Array of Strings type 35 | col = ClickHouseColumn(column='name', dtype='Array(String)') 36 | verify_column_types(col, False, False, False, False) 37 | assert repr(col) == '' 38 | 39 | # Test Array of Nullable Strings type 40 | col = ClickHouseColumn(column='name', dtype='Array(Nullable(String))') 41 | verify_column_types(col, False, False, False, False) 42 | assert repr(col) == '' 43 | 44 | # Test Array of Nullable FixedStrings type 45 | col = ClickHouseColumn(column='name', dtype='Array(Nullable(FixedString(16)))') 46 | verify_column_types(col, False, False, False, False) 47 | assert ( 48 | repr(col) 49 | == '' 50 | ) 51 | 52 | def test_low_cardinality_nullable_type(self): 53 | col = ClickHouseColumn(column='name', dtype='LowCardinality(Nullable(String))') 54 | verify_column_types(col, True, False, False, False) 55 | assert ( 56 | repr(col) 57 | == '' 58 | ) 59 | col = ClickHouseColumn(column='name', dtype='LowCardinality(Nullable(FixedString(16)))') 60 | verify_column_types(col, True, False, False, False) 61 | assert ( 62 | repr(col) 63 | == '' 64 | ) 65 | 66 | def test_map_type(self): 67 | col = ClickHouseColumn(column='name', dtype='Map(String, UInt64)') 68 | verify_column_types(col, False, False, False, False) 69 | assert repr(col) == '' 70 | col = ClickHouseColumn(column='name', dtype='Map(String, Decimal(6, 6))') 71 | verify_column_types(col, False, False, False, False) 72 | assert ( 73 | repr(col) == '' 74 | ) 75 | 76 | 77 | def verify_column( 78 | name: str, dtype: str, is_string: bool, is_numeric: bool, is_float: bool, is_int: bool 79 | ) -> ClickHouseColumn: 80 | data_type = 'String' if is_string else dtype 81 | col = ClickHouseColumn(column=name, dtype=dtype) 82 | verify_column_types(col, is_string, is_numeric, is_float, is_int) 83 | assert repr(col) == f'' 84 | 85 | # Test Nullable dtype. 86 | nullable_col = ClickHouseColumn(column=name, dtype=f'Nullable({dtype})') 87 | verify_column_types(nullable_col, is_string, is_numeric, is_float, is_int) 88 | assert ( 89 | repr(nullable_col) 90 | == f'' 91 | ) 92 | 93 | # Test low cardinality dtype 94 | low_cardinality_col = ClickHouseColumn(column=name, dtype=f'LowCardinality({dtype})') 95 | verify_column_types(low_cardinality_col, is_string, is_numeric, is_float, is_int) 96 | assert ( 97 | repr(low_cardinality_col) 98 | == f'' 99 | ) 100 | return col 101 | 102 | 103 | def verify_column_types( 104 | col: ClickHouseColumn, is_string: bool, is_numeric: bool, is_float: bool, is_int: bool 105 | ): 106 | assert col.is_string() == is_string 107 | assert col.is_numeric() == is_numeric 108 | assert col.is_float() == is_float 109 | assert col.is_integer() == is_int 110 | -------------------------------------------------------------------------------- /tests/integration/adapter/concurrency/test_concurrency.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.concurrency.test_concurrency import BaseConcurrency, seeds__update_csv 2 | from dbt.tests.util import ( 3 | check_relations_equal, 4 | check_table_does_not_exist, 5 | rm_file, 6 | run_dbt, 7 | run_dbt_and_capture, 8 | write_file, 9 | ) 10 | 11 | 12 | class TestConcurrency(BaseConcurrency): 13 | def test_clickhouse_concurrency(self, project): 14 | run_dbt(["seed", "--select", "seed"]) 15 | results, output = run_dbt_and_capture(["run"], expect_pass=False) 16 | self._validate_results(project, results, output) 17 | 18 | rm_file(project.project_root, "seeds", "seed.csv") 19 | write_file(seeds__update_csv, project.project_root, "seeds", "seed.csv") 20 | 21 | results, output = run_dbt_and_capture(["run"], expect_pass=False) 22 | 23 | self._validate_results(project, results, output) 24 | 25 | def _validate_results(self, project, results, output): 26 | assert len(results) == 7 27 | check_relations_equal(project.adapter, ["seed", "view_model"]) 28 | check_relations_equal(project.adapter, ["seed", "dep"]) 29 | check_relations_equal(project.adapter, ["seed", "table_a"]) 30 | check_relations_equal(project.adapter, ["seed", "table_b"]) 31 | check_table_does_not_exist(project.adapter, "invalid") 32 | check_table_does_not_exist(project.adapter, "skip") 33 | assert "PASS=5 WARN=0 ERROR=1 SKIP=1 TOTAL=7" in output 34 | -------------------------------------------------------------------------------- /tests/integration/adapter/constraints/fixtures_constraints.py: -------------------------------------------------------------------------------- 1 | contract_model_schema_yml = """ 2 | version: 2 3 | models: 4 | - name: my_model 5 | config: 6 | contract: 7 | enforced: true 8 | columns: 9 | - name: id 10 | data_type: Int32 11 | description: hello 12 | - name: color 13 | data_type: String 14 | - name: date_day 15 | data_type: Date 16 | - name: my_model_error 17 | config: 18 | contract: 19 | enforced: true 20 | columns: 21 | - name: id 22 | data_type: Int32 23 | description: hello 24 | tests: 25 | - unique 26 | - name: color 27 | data_type: String 28 | - name: date_day 29 | data_type: Date 30 | - name: my_model_wrong_order 31 | config: 32 | contract: 33 | enforced: true 34 | columns: 35 | - name: id 36 | data_type: UInt32 37 | description: hello 38 | tests: 39 | - unique 40 | - name: color 41 | data_type: String 42 | - name: date_day 43 | data_type: Date 44 | - name: my_model_wrong_name 45 | config: 46 | contract: 47 | enforced: true 48 | columns: 49 | - name: id 50 | data_type: Int32 51 | description: hello 52 | - name: color 53 | data_type: String 54 | - name: date_day 55 | data_type: Date 56 | """ 57 | 58 | 59 | # model columns in a different order to schema definitions 60 | my_model_wrong_order_sql = """ 61 | {{ 62 | config( 63 | materialized = "table" 64 | ) 65 | }} 66 | 67 | select 68 | 'blue' as color, 69 | 1::UInt32 as id, 70 | toDate('2019-01-01') as date_day 71 | """ 72 | 73 | 74 | # model columns name different to schema definitions 75 | my_model_wrong_name_sql = """ 76 | {{ 77 | config( 78 | materialized = "table" 79 | ) 80 | }} 81 | 82 | select 83 | 'blue' as color, 84 | 1 as error, 85 | '2019-01-01' as date_day 86 | """ 87 | 88 | 89 | my_model_data_type_sql = """ 90 | {{{{ 91 | config( 92 | materialized = "table" 93 | ) 94 | }}}} 95 | 96 | select 97 | {sql_value} as wrong_data_type_column_name 98 | """ 99 | 100 | 101 | model_data_type_schema_yml = """ 102 | version: 2 103 | models: 104 | - name: my_model_data_type 105 | config: 106 | contract: 107 | enforced: true 108 | columns: 109 | - name: wrong_data_type_column_name 110 | data_type: {data_type} 111 | """ 112 | 113 | my_model_view_wrong_name_sql = """ 114 | {{ 115 | config( 116 | materialized = "view" 117 | ) 118 | }} 119 | 120 | select 121 | 'blue' as color, 122 | 1 as error, 123 | toDate('2019-01-01') as date_day 124 | """ 125 | 126 | my_model_view_wrong_order_sql = """ 127 | {{ 128 | config( 129 | materialized = "view" 130 | ) 131 | }} 132 | 133 | select 134 | 'blue' as color, 135 | 1::UInt32 as id, 136 | toDate('2019-01-01') as date_day 137 | """ 138 | 139 | 140 | my_model_incremental_wrong_order_sql = """ 141 | {{ 142 | config( 143 | materialized = "incremental", 144 | on_schema_change='append_new_columns' 145 | ) 146 | }} 147 | 148 | select 149 | 'blue' as color, 150 | 1::UInt32 as id, 151 | toDate('2019-01-01') as date_day 152 | """ 153 | 154 | my_model_incremental_wrong_name_sql = """ 155 | {{ 156 | config( 157 | materialized = "incremental", 158 | on_schema_change='append_new_columns' 159 | ) 160 | }} 161 | 162 | select 163 | 'blue' as color, 164 | 1 as error, 165 | '2019-01-01' as date_day 166 | """ 167 | 168 | constraint_model_schema_yml = """ 169 | version: 2 170 | models: 171 | - name: bad_column_constraint_model 172 | materialized: table 173 | config: 174 | contract: 175 | enforced: true 176 | columns: 177 | - name: id 178 | data_type: Int32 179 | constraints: 180 | - type: check 181 | expression: '> 0' 182 | - name: color 183 | data_type: String 184 | - name: date_day 185 | data_type: Date 186 | - name: bad_foreign_key_model 187 | config: 188 | contract: 189 | enforced: true 190 | constraints: 191 | - type: foreign_key 192 | columns: [ id ] 193 | expression: 'foreign_key_model (id)' 194 | columns: 195 | - name: id 196 | data_type: Int32 197 | - name: check_constraints_model 198 | config: 199 | contract: 200 | enforced: true 201 | constraints: 202 | - type: check 203 | name: valid_id 204 | expression: 'id > 100 and id < 200' 205 | columns: 206 | - name: id 207 | data_type: Int32 208 | - name: color 209 | data_type: String 210 | - name: date_day 211 | data_type: Date 212 | """ 213 | 214 | bad_column_constraint_model_sql = """ 215 | {{ 216 | config( 217 | materialized = "table" 218 | ) 219 | }} 220 | 221 | SELECT 5::Int32 as id, 'black' as color, toDate('2023-01-01') as date_day 222 | """ 223 | 224 | bad_foreign_key_model_sql = """ 225 | {{ 226 | config( 227 | materialized = "table" 228 | ) 229 | }} 230 | 231 | SELECT 1::Int32 as id 232 | """ 233 | 234 | check_constraints_model_sql = """ 235 | {{ 236 | config( 237 | materialized = "table", 238 | ) 239 | }} 240 | 241 | select 242 | 'blue' as color, 243 | 101::Int32 as id, 244 | toDate('2019-01-01') as date_day 245 | """ 246 | 247 | check_constraints_model_fail_sql = """ 248 | {{ 249 | config( 250 | materialized = "table", 251 | ) 252 | }} 253 | 254 | select 255 | 'blue' as color, 256 | 1::Int32 as id, 257 | toDate('2019-01-01') as date_day 258 | """ 259 | 260 | custom_constraint_model_schema_yml = """ 261 | version: 2 262 | models: 263 | - name: custom_column_constraint_model 264 | materialized: table 265 | config: 266 | contract: 267 | enforced: true 268 | columns: 269 | - name: id 270 | data_type: Int32 271 | codec: ZSTD 272 | - name: ts 273 | data_type: timestamp 274 | - name: col_ttl 275 | data_type: String 276 | ttl: ts + INTERVAL 1 DAY 277 | """ 278 | 279 | check_custom_constraints_model_sql = """ 280 | {{ 281 | config( 282 | materialized = "table", 283 | ) 284 | }} 285 | 286 | select 287 | 101::Int32 as id, 288 | timestamp('2025-04-16') as ts, 289 | 'blue' as col_ttl 290 | """ 291 | -------------------------------------------------------------------------------- /tests/integration/adapter/dbt_clone/test_dbt_clone.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.dbt_clone.test_dbt_clone import BaseClonePossible 3 | 4 | 5 | @pytest.mark.skip("clone not supported") 6 | class TestBaseClonePossible(BaseClonePossible): 7 | pass 8 | -------------------------------------------------------------------------------- /tests/integration/adapter/dbt_debug/test_dbt_debug.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from dbt.tests.adapter.dbt_debug.test_dbt_debug import BaseDebug 4 | from dbt.tests.util import run_dbt 5 | 6 | 7 | class TestDebugClickHouse(BaseDebug): 8 | def test_ok(self, project): 9 | run_dbt(["debug"]) 10 | assert "ERROR" not in self.capsys.readouterr().out 11 | 12 | def test_nopass(self, project): 13 | run_dbt(["debug", "--target", "nopass"], expect_pass=False) 14 | self.assertGotValue(re.compile(r"\s+profiles\.yml file"), "ERROR invalid") 15 | 16 | def test_wronguser(self, project): 17 | run_dbt(["debug", "--target", "wronguser"], expect_pass=False) 18 | self.assertGotValue(re.compile(r"\s+Connection test"), "ERROR") 19 | 20 | def test_empty_target(self, project): 21 | run_dbt(["debug", "--target", "none_target"], expect_pass=False) 22 | self.assertGotValue(re.compile(r"\s+output 'none_target'"), "misconfigured") 23 | -------------------------------------------------------------------------------- /tests/integration/adapter/dbt_show/test_dbt_show.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.dbt_show.test_dbt_show import BaseShowLimit, BaseShowSqlHeader 2 | 3 | 4 | class TestShowLimit(BaseShowLimit): 5 | pass 6 | 7 | 8 | class TestShowSqlHeader(BaseShowSqlHeader): 9 | pass 10 | -------------------------------------------------------------------------------- /tests/integration/adapter/dictionary/test_dictionary.py: -------------------------------------------------------------------------------- 1 | """ 2 | test dictionary support in dbt-clickhouse 3 | """ 4 | 5 | import json 6 | import os 7 | 8 | import pytest 9 | from dbt.tests.util import run_dbt 10 | 11 | testing_s3 = os.environ.get('DBT_CH_TEST_INCLUDE_S3', '').lower() in ('1', 'true', 'yes') 12 | 13 | 14 | PEOPLE_SEED_CSV = """ 15 | id,name,age,department 16 | 1231,Dade,33,engineering 17 | 6666,Ksenia,48,engineering 18 | 8888,Kate,50,engineering 19 | """.lstrip() 20 | 21 | # This model is parameterized, in a way, by the "run_type" dbt project variable 22 | # This is to be able to switch between different model definitions within 23 | # the same test run and allow us to test the evolution of a materialized view 24 | HACKERS_MODEL = """ 25 | {{ config( 26 | materialized='dictionary', 27 | fields=[ 28 | ('id', 'Int32'), 29 | ('name', 'String'), 30 | ('hacker_alias', 'String') 31 | ], 32 | primary_key='id', 33 | layout='COMPLEX_KEY_HASHED()', 34 | lifetime='1', 35 | source_type='clickhouse', 36 | ) }} 37 | 38 | {% if var('run_type', '') == '' %} 39 | select 40 | id, 41 | name, 42 | case 43 | when name like 'Dade' then 'crash_override' 44 | when name like 'Kate' then 'acid burn' 45 | when name like 'Eugene' then 'the plague' 46 | else 'N/A' 47 | end as hacker_alias 48 | from {{ source('raw', 'people') }} 49 | 50 | {% else %} 51 | 52 | select 53 | id, 54 | name, 55 | case 56 | -- Dade wasn't always known as 'crash override'! 57 | when name like 'Dade' and age = 11 then 'zero cool' 58 | when name like 'Dade' and age != 11 then 'crash override' 59 | when name like 'Kate' then 'acid burn' 60 | when name like 'Eugene' then 'the plague' 61 | else 'N/A' 62 | end as hacker_alias 63 | from {{ source('raw', 'people') }} 64 | {% endif %} 65 | """ 66 | 67 | 68 | TAXI_ZONE_DICTIONARY = """ 69 | {{ config( 70 | materialized='dictionary', 71 | fields=[ 72 | ('LocationID', 'UInt16 DEFAULT 0'), 73 | ('Borough', 'String'), 74 | ('Zone', 'String'), 75 | ('service_zone', 'String'), 76 | ], 77 | primary_key='LocationID', 78 | layout='HASHED()', 79 | lifetime='MIN 0 MAX 0', 80 | source_type='http', 81 | url='https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/taxi_zone_lookup.csv', 82 | format='CSVWithNames' 83 | ) }} 84 | 85 | select 1 86 | """ 87 | 88 | 89 | PEOPLE_DICT_MODEL = """ 90 | {{ config( 91 | materialized='dictionary', 92 | fields=[ 93 | ('id', 'Int32'), 94 | ('name', 'String'), 95 | ], 96 | primary_key='id', 97 | layout='HASHED()', 98 | lifetime='1', 99 | source_type='clickhouse', 100 | table='people' 101 | ) }} 102 | 103 | select 1 104 | """ 105 | 106 | 107 | SEED_SCHEMA_YML = """ 108 | version: 2 109 | 110 | sources: 111 | - name: raw 112 | schema: "{{ target.schema }}" 113 | tables: 114 | - name: people 115 | """ 116 | 117 | RANGE_DICTIONARY = """ 118 | {{ config( 119 | materialized='dictionary', 120 | fields=[ 121 | ('id', 'UInt8'), 122 | ('start', 'UInt8'), 123 | ('stop', 'UInt8'), 124 | ('value', 'String') 125 | ], 126 | primary_key='id', 127 | layout='RANGE_HASHED()', 128 | lifetime='MIN 0 MAX 0', 129 | source_type='clickhouse', 130 | range='min start max stop' 131 | ) }} 132 | 133 | select 134 | c1 as id, 135 | c2 as start, 136 | c3 as stop, 137 | c4 as value 138 | from values( 139 | (0, 0, 2, 'foo'), 140 | (0, 3, 5, 'bar') 141 | ) 142 | """ 143 | 144 | 145 | class TestQueryDictionary: 146 | @pytest.fixture(scope="class") 147 | def seeds(self): 148 | return { 149 | "people.csv": PEOPLE_SEED_CSV, 150 | "schema.yml": SEED_SCHEMA_YML, 151 | } 152 | 153 | @pytest.fixture(scope="class") 154 | def models(self): 155 | return { 156 | "hackers.sql": HACKERS_MODEL, 157 | } 158 | 159 | def test_create_and_update(self, project): 160 | run_dbt(["seed"]) 161 | 162 | result = project.run_sql("DESCRIBE TABLE people", fetch="all") 163 | assert result[0][1] == "Int32" 164 | 165 | run_dbt() 166 | result = project.run_sql("select count(distinct id) from hackers", fetch="all") 167 | assert result[0][0] == 3 168 | 169 | # insert some data and make sure it reaches the target dictionary 170 | project.run_sql( 171 | """ 172 | insert into people ("id", "name", "age", "department") 173 | values (1232,'Dade',11,'engineering'), (9999,'Eugene',40,'malware'); 174 | """ 175 | ) 176 | # force the dictionary to be rebuilt to include the new records in `people` 177 | project.run_sql("system reload dictionary hackers") 178 | result = project.run_sql("select count(distinct id) from hackers", fetch="all") 179 | assert result[0][0] == 5 180 | 181 | # re-run dbt but this time with the new MV SQL 182 | run_vars = {"run_type": "extended_schema"} 183 | run_dbt(["run", "--vars", json.dumps(run_vars)]) 184 | results = project.run_sql("select distinct hacker_alias from hackers", fetch="all") 185 | names = set(i[0] for i in results) 186 | assert names == set(["zero cool", "crash override", "acid burn", "the plague", "N/A"]) 187 | 188 | 189 | class TestTableDictionary: 190 | @pytest.fixture(scope="class") 191 | def seeds(self): 192 | return { 193 | "people.csv": PEOPLE_SEED_CSV, 194 | "schema.yml": SEED_SCHEMA_YML, 195 | } 196 | 197 | @pytest.fixture(scope="class") 198 | def models(self): 199 | return {"people_dict.sql": PEOPLE_DICT_MODEL} 200 | 201 | def test_create(self, project): 202 | run_dbt(["seed"]) 203 | run_dbt() 204 | 205 | results = project.run_sql("select distinct name from people_dict", fetch="all") 206 | names = set(i[0] for i in results) 207 | assert names == set(["Dade", "Kate", "Ksenia"]) 208 | 209 | 210 | class TestHttpDictionary: 211 | @pytest.fixture(scope="class") 212 | def models(self): 213 | return {"taxi_zone_dictionary.sql": TAXI_ZONE_DICTIONARY} 214 | 215 | @pytest.mark.skipif(not testing_s3, reason='Testing S3 disabled') 216 | def test_create(self, project): 217 | run_dbt() 218 | 219 | results = project.run_sql( 220 | "select count(distinct LocationID) from taxi_zone_dictionary", fetch="all" 221 | ) 222 | assert results[0][0] == 265 223 | 224 | 225 | class TestRangeDictionary: 226 | @pytest.fixture(scope="class") 227 | def models(self): 228 | return {"range_dictionary.sql": RANGE_DICTIONARY} 229 | 230 | def test_create(self, project): 231 | run_dbt() 232 | 233 | results = project.run_sql("select dictGet(range_dictionary, 'value', 0, 1)", fetch="all") 234 | assert results[0][0] == "foo" 235 | results = project.run_sql("select dictGet(range_dictionary, 'value', 0, 5)", fetch="all") 236 | assert results[0][0] == "bar" 237 | -------------------------------------------------------------------------------- /tests/integration/adapter/grants/test_distributed_grants.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants 5 | from dbt.tests.util import get_manifest, run_dbt_and_capture, write_file 6 | 7 | distributed_table_model_schema_yml = """ 8 | version: 2 9 | models: 10 | - name: my_model 11 | config: 12 | materialized: distributed_table 13 | grants: 14 | select: ["{{ env_var('DBT_TEST_USER_1') }}"] 15 | insert: ["{{ env_var('DBT_TEST_USER_2') }}"] 16 | """ 17 | 18 | 19 | class TestDistributedTableModelGrants(BaseModelGrants): 20 | @pytest.mark.skipif( 21 | os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' 22 | ) 23 | def test_view_table_grants(self, project, get_test_users): 24 | # we want the test to fail, not silently skip 25 | test_users = get_test_users 26 | select_privilege_name = self.privilege_grantee_name_overrides()["select"] 27 | insert_privilege_name = self.privilege_grantee_name_overrides()["insert"] 28 | assert len(test_users) == 3 29 | # Distributed Table materialization, single select grant 30 | updated_yaml = self.interpolate_name_overrides(distributed_table_model_schema_yml) 31 | write_file(updated_yaml, project.project_root, "models", "schema.yml") 32 | (results, log_output) = run_dbt_and_capture(["--debug", "run"]) 33 | assert len(results) == 1 34 | manifest = get_manifest(project.project_root) 35 | model_id = "model.test.my_model" 36 | model = manifest.nodes[model_id] 37 | assert model.config.materialized == "distributed_table" 38 | expected = {select_privilege_name: [test_users[0]], insert_privilege_name: [test_users[1]]} 39 | self.assert_expected_grants_match_actual(project, "my_model", expected) 40 | 41 | def assert_expected_grants_match_actual(self, project, relation_name, expected_grants): 42 | super().assert_expected_grants_match_actual(project, relation_name, expected_grants) 43 | 44 | # also needs grants for local table 45 | actual_local_grants = self.get_grants_on_relation(project, relation_name + "_local") 46 | from dbt.context.base import BaseContext 47 | 48 | diff_a_local = BaseContext.diff_of_two_dicts(actual_local_grants, expected_grants) 49 | diff_b_local = BaseContext.diff_of_two_dicts(expected_grants, actual_local_grants) 50 | assert diff_a_local == diff_b_local == {} 51 | -------------------------------------------------------------------------------- /tests/integration/adapter/grants/test_incremental_grants.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.grants.test_incremental_grants import BaseIncrementalGrants 2 | 3 | 4 | class TestIncrementalGrants(BaseIncrementalGrants): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/grants/test_invalid_grants.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.grants.test_invalid_grants import BaseInvalidGrants 2 | 3 | 4 | class TestInvalidGrants(BaseInvalidGrants): 5 | def grantee_does_not_exist_error(self): 6 | return "511" 7 | 8 | # ClickHouse doesn't give a very specific error for an invalid privilege 9 | def privilege_does_not_exist_error(self): 10 | return "Syntax error" 11 | -------------------------------------------------------------------------------- /tests/integration/adapter/grants/test_model_grants.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants 2 | 3 | 4 | class TestModelGrants(BaseModelGrants): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/grants/test_seed_grants.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.grants.test_seed_grants import BaseSeedGrants 2 | 3 | 4 | class TestSeedGrants(BaseSeedGrants): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/grants/test_snapshot_grants.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.grants.test_snapshot_grants import BaseSnapshotGrants 2 | 3 | 4 | class TestSnapshotGrants(BaseSnapshotGrants): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/hooks/test_model_hooks.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.exceptions import CompilationError 3 | from dbt.tests.adapter.hooks.fixtures import models__hooks_error 4 | from dbt.tests.util import run_dbt 5 | 6 | 7 | class TestDuplicateHooksInConfigs: 8 | @pytest.fixture(scope="class") 9 | def models(self): 10 | return {"hooks.sql": models__hooks_error} 11 | 12 | def test_run_duplicate_hook_defs(self, project): 13 | with pytest.raises(CompilationError) as exc: 14 | run_dbt() 15 | assert "pre_hook" in str(exc.value) 16 | assert "pre-hook" in str(exc.value) 17 | -------------------------------------------------------------------------------- /tests/integration/adapter/incremental/test_incremental_microbatch.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.incremental.test_incremental_microbatch import BaseMicrobatch 3 | 4 | _input_model_sql = """ 5 | {{ 6 | config( 7 | materialized='table', 8 | event_time='event_time' 9 | ) 10 | }} 11 | 12 | select 1 as id, toDateTime('2020-01-01 00:00:00') as event_time 13 | union all 14 | select 2 as id, toDateTime('2020-01-02 00:00:00') as event_time 15 | union all 16 | select 3 as id, toDateTime('2020-01-03 00:00:00') as event_time 17 | """ 18 | 19 | _microbatch_model_sql = """ 20 | {{ 21 | config( 22 | materialized='incremental', 23 | incremental_strategy='microbatch', 24 | unique_key='id', 25 | event_time='event_time', 26 | batch_size='day', 27 | begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0) 28 | ) 29 | }} 30 | 31 | select * from {{ ref('input_model') }} 32 | """ 33 | 34 | 35 | class TestMicrobatchIncremental(BaseMicrobatch): 36 | @pytest.fixture(scope="class") 37 | def models(self): 38 | return { 39 | "input_model.sql": _input_model_sql, 40 | "microbatch_model.sql": _microbatch_model_sql, 41 | } 42 | 43 | @pytest.fixture(scope="class") 44 | def insert_two_rows_sql(self, project) -> str: 45 | test_schema_relation = project.adapter.Relation.create( 46 | database=project.database, schema=project.test_schema 47 | ) 48 | return f"insert into {test_schema_relation}.input_model (id, event_time) values (4, '2020-01-04 00:00:00'), (5, '2020-01-05 00:00:00')" 49 | -------------------------------------------------------------------------------- /tests/integration/adapter/incremental/test_incremental_predicates.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.incremental.test_incremental_predicates import BaseIncrementalPredicates 2 | 3 | 4 | class TestIncrementalPredicates(BaseIncrementalPredicates): 5 | def test__incremental_predicates(self, project, ch_test_version): 6 | if ch_test_version.startswith('22.3'): 7 | return # lightweight deletes not supported in 22.3 8 | super().test__incremental_predicates(project) 9 | -------------------------------------------------------------------------------- /tests/integration/adapter/incremental/test_incremental_unique_key.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.incremental.test_incremental_unique_id import ( 3 | BaseIncrementalUniqueKey, 4 | models__duplicated_unary_unique_key_list_sql, 5 | models__empty_str_unique_key_sql, 6 | models__empty_unique_key_list_sql, 7 | models__no_unique_key_sql, 8 | models__nontyped_trinary_unique_key_list_sql, 9 | models__not_found_unique_key_list_sql, 10 | models__not_found_unique_key_sql, 11 | models__str_unique_key_sql, 12 | models__trinary_unique_key_list_sql, 13 | models__unary_unique_key_list_sql, 14 | ) 15 | 16 | models__expected__one_str__overwrite_sql = """ 17 | {{ 18 | config( 19 | materialized='table' 20 | ) 21 | }} 22 | 23 | select 24 | 'CT' as state, 25 | 'Hartford' as county, 26 | 'Hartford' as city, 27 | toDate('2022-02-14') as last_visit_date 28 | union all 29 | select 'MA','Suffolk','Boston',toDate('2020-02-12') 30 | union all 31 | select 'NJ','Mercer','Trenton',toDate('2022-01-01') 32 | union all 33 | select 'NY','Kings','Brooklyn',toDate('2021-04-02') 34 | union all 35 | select 'NY','New York','Manhattan',toDate('2021-04-01') 36 | union all 37 | select 'PA','Philadelphia','Philadelphia',toDate('2021-05-21') 38 | union all 39 | select 'CO','Denver','',toDate('2021-06-18') 40 | """ 41 | 42 | 43 | models__expected__unique_key_list__inplace_overwrite_sql = """ 44 | {{ 45 | config( 46 | materialized='table' 47 | ) 48 | }} 49 | 50 | select 51 | 'CT' as state, 52 | 'Hartford' as county, 53 | 'Hartford' as city, 54 | toDate('2022-02-14') as last_visit_date 55 | union all 56 | select 'MA','Suffolk','Boston',toDate('2020-02-12') 57 | union all 58 | select 'NJ','Mercer','Trenton',toDate('2022-01-01') 59 | union all 60 | select 'NY','Kings','Brooklyn',toDate('2021-04-02') 61 | union all 62 | select 'NY','New York','Manhattan',toDate('2021-04-01') 63 | union all 64 | select 'PA','Philadelphia','Philadelphia',toDate('2021-05-21') 65 | union all 66 | select 'CO','Denver','',toDate('2021-06-18') 67 | """ 68 | 69 | 70 | class TestIncrementalUniqueKey(BaseIncrementalUniqueKey): 71 | @pytest.fixture(scope="class") 72 | def models(self): 73 | return { 74 | "trinary_unique_key_list.sql": models__trinary_unique_key_list_sql, 75 | "nontyped_trinary_unique_key_list.sql": models__nontyped_trinary_unique_key_list_sql, 76 | "unary_unique_key_list.sql": models__unary_unique_key_list_sql, 77 | "not_found_unique_key.sql": models__not_found_unique_key_sql, 78 | "empty_unique_key_list.sql": models__empty_unique_key_list_sql, 79 | "no_unique_key.sql": models__no_unique_key_sql, 80 | "empty_str_unique_key.sql": models__empty_str_unique_key_sql, 81 | "str_unique_key.sql": models__str_unique_key_sql, 82 | "duplicated_unary_unique_key_list.sql": models__duplicated_unary_unique_key_list_sql, 83 | "not_found_unique_key_list.sql": models__not_found_unique_key_list_sql, 84 | "expected": { 85 | "one_str__overwrite.sql": models__expected__one_str__overwrite_sql, 86 | "unique_key_list__inplace_overwrite.sql": models__expected__unique_key_list__inplace_overwrite_sql, 87 | }, 88 | } 89 | -------------------------------------------------------------------------------- /tests/integration/adapter/materialized_view/test_refreshable_materialized_view.py: -------------------------------------------------------------------------------- 1 | """ 2 | test refreshable materialized view creation. This is ClickHouse specific, which has a significantly different implementation 3 | of materialized views from PostgreSQL or Oracle 4 | """ 5 | 6 | import json 7 | 8 | import pytest 9 | from dbt.tests.util import check_relation_types, run_dbt 10 | 11 | PEOPLE_SEED_CSV = """ 12 | id,name,age,department 13 | 1231,Dade,33,engineering 14 | 6666,Ksenia,48,engineering 15 | 8888,Kate,50,engineering 16 | 1000,Alfie,10,sales 17 | 2000,Bill,20,sales 18 | 3000,Charlie,30,sales 19 | """.lstrip() 20 | 21 | # This model is parameterized, in a way, by the "run_type" dbt project variable 22 | # This is to be able to switch between different model definitions within 23 | # the same test run and allow us to test the evolution of a materialized view 24 | MV_MODEL = """ 25 | {{ config( 26 | materialized='materialized_view', 27 | engine='MergeTree()', 28 | order_by='(department)', 29 | refreshable=( 30 | { 31 | "interval": "EVERY 2 MINUTE", 32 | "depends_on": ['depend_on_model'], 33 | "depends_on_validation": True 34 | } if var('run_type', '') == 'validate_depends_on' else { 35 | "interval": "EVERY 2 MINUTE" 36 | } 37 | ) 38 | ) 39 | }} 40 | select 41 | department, 42 | avg(age) as average 43 | from {{ source('raw', 'people') }} 44 | group by department 45 | """ 46 | 47 | SEED_SCHEMA_YML = """ 48 | version: 2 49 | 50 | sources: 51 | - name: raw 52 | schema: "{{ target.schema }}" 53 | tables: 54 | - name: people 55 | """ 56 | 57 | 58 | class TestBasicRefreshableMV: 59 | @pytest.fixture(scope="class") 60 | def seeds(self): 61 | """ 62 | we need a base table to pull from 63 | """ 64 | return { 65 | "people.csv": PEOPLE_SEED_CSV, 66 | "schema.yml": SEED_SCHEMA_YML, 67 | } 68 | 69 | @pytest.fixture(scope="class") 70 | def models(self): 71 | return { 72 | "hackers.sql": MV_MODEL, 73 | } 74 | 75 | def test_create(self, project): 76 | """ 77 | 1. create a base table via dbt seed 78 | 2. create a model as a refreshable materialized view, selecting from the table created in (1) 79 | 3. check in system.view_refreshes for the table existence 80 | """ 81 | results = run_dbt(["seed"]) 82 | assert len(results) == 1 83 | columns = project.run_sql(f"DESCRIBE TABLE {project.test_schema}.people", fetch="all") 84 | assert columns[0][1] == "Int32" 85 | 86 | # create the model 87 | results = run_dbt() 88 | assert len(results) == 1 89 | 90 | columns = project.run_sql(f"DESCRIBE TABLE hackers", fetch="all") 91 | assert columns[0][1] == "String" 92 | 93 | columns = project.run_sql(f"DESCRIBE hackers_mv", fetch="all") 94 | assert columns[0][1] == "String" 95 | 96 | check_relation_types( 97 | project.adapter, 98 | { 99 | "hackers_mv": "view", 100 | "hackers": "table", 101 | }, 102 | ) 103 | 104 | result = project.run_sql( 105 | f"select database, view, status from system.view_refreshes where database= '{project.test_schema}' and view='hackers_mv'", 106 | fetch="all", 107 | ) 108 | assert result[0][2] == 'Scheduled' 109 | 110 | def test_validate_dependency(self, project): 111 | """ 112 | 1. create a base table via dbt seed 113 | 2. create a refreshable mv model with non exist dependency and validation config, selecting from the table created in (1) 114 | 3. make sure we get an error 115 | """ 116 | results = run_dbt(["seed"]) 117 | assert len(results) == 1 118 | columns = project.run_sql(f"DESCRIBE TABLE {project.test_schema}.people", fetch="all") 119 | assert columns[0][1] == "Int32" 120 | 121 | # re-run dbt but this time with the new MV SQL 122 | run_vars = {"run_type": "validate_depends_on"} 123 | result = run_dbt(["run", "--vars", json.dumps(run_vars)], False) 124 | assert result[0].status == 'error' 125 | assert 'No existing MV found matching MV' in result[0].message 126 | -------------------------------------------------------------------------------- /tests/integration/adapter/persist_docs/fixtures.py: -------------------------------------------------------------------------------- 1 | _MODELS__VIEW = """ 2 | {{ config(materialized='view') }} 3 | select 2 as id, 'Bob' as name 4 | """ 5 | 6 | _MODELS__NO_DOCS_MODEL = """ 7 | select 1 as id, 'Alice' as name 8 | """ 9 | 10 | _DOCS__MY_FUN_DOCS = """ 11 | {% docs my_fun_doc %} 12 | name Column description "with double quotes" 13 | and with 'single quotes' as welll as other; 14 | '''abc123''' 15 | reserved -- characters 16 | -- 17 | /* comment */ 18 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 19 | 20 | {% enddocs %} 21 | """ 22 | 23 | _MODELS__TABLE = """ 24 | {{ config(materialized='table') }} 25 | select 1 as id, 'Joe' as name 26 | """ 27 | 28 | 29 | _MODELS__MISSING_COLUMN = """ 30 | {{ config(materialized='table') }} 31 | select 1 as id, 'Ed' as name 32 | """ 33 | 34 | _MODELS__MODEL_USING_QUOTE_UTIL = """ 35 | select 1 as {{ adapter.quote("2id") }} 36 | """ 37 | 38 | _PROPERTIES__QUOTE_MODEL = """ 39 | version: 2 40 | models: 41 | - name: quote_model 42 | description: "model to test column quotes and comments" 43 | columns: 44 | - name: 2id 45 | description: "XXX My description" 46 | quote: true 47 | """ 48 | 49 | _PROPERITES__SCHEMA_MISSING_COL = """ 50 | version: 2 51 | models: 52 | - name: missing_column 53 | columns: 54 | - name: id 55 | description: "test id column description" 56 | - name: column_that_does_not_exist 57 | description: "comment that cannot be created" 58 | """ 59 | 60 | _PROPERTIES__SCHEMA_YML = """ 61 | version: 2 62 | 63 | models: 64 | - name: table_model 65 | description: | 66 | Table model description "with double quotes" 67 | and with 'single quotes' as welll as other; 68 | '''abc123''' 69 | reserved -- characters 70 | -- 71 | /* comment */ 72 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 73 | columns: 74 | - name: id 75 | description: | 76 | id Column description "with double quotes" 77 | and with 'single quotes' as welll as other; 78 | '''abc123''' 79 | reserved -- characters 80 | -- 81 | /* comment */ 82 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 83 | - name: name 84 | description: | 85 | Some stuff here and then a call to 86 | {{ doc('my_fun_doc')}} 87 | - name: view_model 88 | description: | 89 | View model description "with double quotes" 90 | and with 'single quotes' as welll as other; 91 | '''abc123''' 92 | reserved -- characters 93 | -- 94 | /* comment */ 95 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 96 | columns: 97 | - name: id 98 | description: | 99 | id Column description "with double quotes" 100 | and with 'single quotes' as welll as other; 101 | '''abc123''' 102 | reserved -- characters 103 | -- 104 | /* comment */ 105 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 106 | 107 | seeds: 108 | - name: seed 109 | description: | 110 | Seed model description "with double quotes" 111 | and with 'single quotes' as welll as other; 112 | '''abc123''' 113 | reserved -- characters 114 | -- 115 | /* comment */ 116 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 117 | columns: 118 | - name: id 119 | description: | 120 | id Column description "with double quotes" 121 | and with 'single quotes' as welll as other; 122 | '''abc123''' 123 | reserved -- characters 124 | -- 125 | /* comment */ 126 | Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting 127 | - name: name 128 | description: | 129 | Some stuff here and then a call to 130 | {{ doc('my_fun_doc')}} 131 | """ 132 | 133 | 134 | _SEEDS__SEED = """id,name 135 | 1,Alice 136 | 2,Bob 137 | """ 138 | -------------------------------------------------------------------------------- /tests/integration/adapter/persist_docs/test_persist_docs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pytest 5 | from dbt.tests.util import run_dbt 6 | from fixtures import ( 7 | _DOCS__MY_FUN_DOCS, 8 | _MODELS__MISSING_COLUMN, 9 | _MODELS__MODEL_USING_QUOTE_UTIL, 10 | _MODELS__NO_DOCS_MODEL, 11 | _MODELS__TABLE, 12 | _MODELS__VIEW, 13 | _PROPERITES__SCHEMA_MISSING_COL, 14 | _PROPERTIES__QUOTE_MODEL, 15 | _PROPERTIES__SCHEMA_YML, 16 | _SEEDS__SEED, 17 | ) 18 | 19 | 20 | class BasePersistDocsBase: 21 | @pytest.fixture(scope="class", autouse=True) 22 | def setUp(self, project): 23 | run_dbt(["seed"]) 24 | run_dbt() 25 | 26 | @pytest.fixture(scope="class") 27 | def seeds(self): 28 | return {"seed.csv": _SEEDS__SEED} 29 | 30 | @pytest.fixture(scope="class") 31 | def models(self): 32 | return { 33 | "no_docs_model.sql": _MODELS__NO_DOCS_MODEL, 34 | "table_model.sql": _MODELS__TABLE, 35 | "view_model.sql": _MODELS__VIEW, 36 | } 37 | 38 | @pytest.fixture(scope="class") 39 | def properties(self): 40 | return { 41 | "my_fun_docs.md": _DOCS__MY_FUN_DOCS, 42 | "schema.yml": _PROPERTIES__SCHEMA_YML, 43 | } 44 | 45 | def _assert_common_comments(self, *comments): 46 | for comment in comments: 47 | assert '"with double quotes"' in comment 48 | assert """'''abc123'''""" in comment 49 | assert "\n" in comment 50 | assert "Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting" in comment 51 | assert "/* comment */" in comment 52 | if os.name == "nt": 53 | assert "--\r\n" in comment or "--\n" in comment 54 | else: 55 | assert "--\n" in comment 56 | 57 | def _assert_has_table_comments(self, table_node): 58 | table_comment = table_node["metadata"]["comment"] 59 | assert table_comment.startswith("Table model description") 60 | 61 | table_id_comment = table_node["columns"]["id"]["comment"] 62 | assert table_id_comment.startswith("id Column description") 63 | 64 | table_name_comment = table_node["columns"]["name"]["comment"] 65 | assert table_name_comment.startswith("Some stuff here and then a call to") 66 | 67 | self._assert_common_comments(table_comment, table_id_comment, table_name_comment) 68 | 69 | def _assert_has_view_comments( 70 | self, view_node, has_node_comments=True, has_column_comments=True 71 | ): 72 | view_comment = view_node["metadata"]["comment"] 73 | if has_node_comments: 74 | assert view_comment.startswith("View model description") 75 | self._assert_common_comments(view_comment) 76 | else: 77 | assert view_comment is None 78 | 79 | view_id_comment = view_node["columns"]["id"]["comment"] 80 | if has_column_comments: 81 | assert view_id_comment.startswith("id Column description") 82 | self._assert_common_comments(view_id_comment) 83 | else: 84 | assert view_id_comment is None 85 | 86 | view_name_comment = view_node["columns"]["name"]["comment"] 87 | assert view_name_comment is None 88 | 89 | 90 | class BasePersistDocs(BasePersistDocsBase): 91 | @pytest.fixture(scope="class") 92 | def project_config_update(self): 93 | return { 94 | "models": { 95 | "test": { 96 | "+persist_docs": { 97 | "relation": True, 98 | "columns": True, 99 | }, 100 | } 101 | } 102 | } 103 | 104 | def test_has_comments_pg_like(self): 105 | if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'): 106 | pytest.skip('Not running comment test for cloud') 107 | run_dbt(["docs", "generate"]) 108 | with open("target/catalog.json") as fp: 109 | catalog_data = json.load(fp) 110 | assert "nodes" in catalog_data 111 | assert len(catalog_data["nodes"]) == 4 112 | table_node = catalog_data["nodes"]["model.test.table_model"] 113 | self._assert_has_table_comments(table_node) 114 | 115 | view_node = catalog_data["nodes"]["model.test.view_model"] 116 | self._assert_has_view_comments(view_node) 117 | 118 | no_docs_node = catalog_data["nodes"]["model.test.no_docs_model"] 119 | self._assert_has_view_comments(no_docs_node, False, False) 120 | 121 | 122 | class BasePersistDocsColumnMissing(BasePersistDocsBase): 123 | @pytest.fixture(scope="class") 124 | def project_config_update(self): 125 | return { 126 | "models": { 127 | "test": { 128 | "+persist_docs": { 129 | "columns": True, 130 | }, 131 | } 132 | } 133 | } 134 | 135 | @pytest.fixture(scope="class") 136 | def models(self): 137 | return {"missing_column.sql": _MODELS__MISSING_COLUMN} 138 | 139 | @pytest.fixture(scope="class") 140 | def properties(self): 141 | return {"schema.yml": _PROPERITES__SCHEMA_MISSING_COL} 142 | 143 | def test_missing_column(self, project): 144 | run_dbt(["docs", "generate"]) 145 | with open("target/catalog.json") as fp: 146 | catalog_data = json.load(fp) 147 | assert "nodes" in catalog_data 148 | 149 | table_node = catalog_data["nodes"]["model.test.missing_column"] 150 | table_id_comment = table_node["columns"]["id"]["comment"] 151 | assert table_id_comment.startswith("test id column description") 152 | 153 | 154 | class BasePersistDocsCommentOnQuotedColumn: 155 | """Covers edge case where column with comment must be quoted. 156 | We set this using the `quote:` tag in the property file.""" 157 | 158 | @pytest.fixture(scope="class") 159 | def models(self): 160 | return {"quote_model.sql": _MODELS__MODEL_USING_QUOTE_UTIL} 161 | 162 | @pytest.fixture(scope="class") 163 | def properties(self): 164 | return {"properties.yml": _PROPERTIES__QUOTE_MODEL} 165 | 166 | @pytest.fixture(scope="class") 167 | def project_config_update(self): 168 | return { 169 | "models": { 170 | "test": { 171 | "materialized": "table", 172 | "+persist_docs": { 173 | "relation": True, 174 | "columns": True, 175 | }, 176 | } 177 | } 178 | } 179 | 180 | @pytest.fixture(scope="class") 181 | def run_has_comments(self, project): 182 | if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'): 183 | pytest.skip('Not running comment test for cloud') 184 | 185 | def fixt(): 186 | run_dbt() 187 | run_dbt(["docs", "generate"]) 188 | with open("target/catalog.json") as fp: 189 | catalog_data = json.load(fp) 190 | assert "nodes" in catalog_data 191 | assert len(catalog_data["nodes"]) == 1 192 | column_node = catalog_data["nodes"]["model.test.quote_model"] 193 | column_comment = column_node["columns"]["2id"]["comment"] 194 | assert column_comment.startswith("XXX") 195 | 196 | return fixt 197 | 198 | def test_quoted_column_comments(self, run_has_comments): 199 | run_has_comments() 200 | 201 | 202 | """ 203 | Code from tests is a copy from dbt-core/tests/adapter/dbt/tests/adapter/persist_docs 204 | It can be removed after upgrading to 1.5.0 205 | """ 206 | 207 | 208 | class TestPersistDocs(BasePersistDocs): 209 | pass 210 | 211 | 212 | class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing): 213 | pass 214 | 215 | 216 | class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn): 217 | pass 218 | -------------------------------------------------------------------------------- /tests/integration/adapter/query_comment/test_query_comment.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.query_comment.test_query_comment import ( 2 | BaseEmptyQueryComments, 3 | BaseMacroArgsQueryComments, 4 | BaseMacroInvalidQueryComments, 5 | BaseMacroQueryComments, 6 | BaseNullQueryComments, 7 | BaseQueryComments, 8 | ) 9 | 10 | 11 | class TestQueryComments(BaseQueryComments): 12 | pass 13 | 14 | 15 | class TestMacroQueryComments(BaseMacroQueryComments): 16 | pass 17 | 18 | 19 | class TestMacroArgsQueryComments(BaseMacroArgsQueryComments): 20 | pass 21 | 22 | 23 | class TestMacroInvalidQueryComments(BaseMacroInvalidQueryComments): 24 | pass 25 | 26 | 27 | class TestNullQueryComments(BaseNullQueryComments): 28 | pass 29 | 30 | 31 | class TestEmptyQueryComments(BaseEmptyQueryComments): 32 | pass 33 | -------------------------------------------------------------------------------- /tests/integration/adapter/query_settings/test_query_settings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | nullable_column_model = """ 5 | {{ 6 | config( 7 | materialized='table', 8 | query_settings={ 9 | 'join_use_nulls': 1 10 | } 11 | ) 12 | }} 13 | select t2.id as test_id 14 | from (select 1 as id) t1 15 | left join (select 2 as id) t2 16 | on t1.id=t2.id 17 | """ 18 | 19 | 20 | class TestNullableColumnJoin: 21 | @pytest.fixture(scope="class") 22 | def models(self): 23 | return { 24 | "nullable_column_model.sql": nullable_column_model, 25 | } 26 | 27 | def test_nullable_column_join(self, project): 28 | run_dbt(["run", "--select", "nullable_column_model"]) 29 | result = project.run_sql( 30 | "select isNullable(test_id) as is_nullable_column from nullable_column_model", 31 | fetch="one", 32 | ) 33 | assert result[0] == 1 34 | 35 | 36 | not_nullable_column_model = """ 37 | {{ 38 | config( 39 | materialized='table', 40 | query_settings={ 41 | 'join_use_nulls': 0 42 | } 43 | ) 44 | }} 45 | select t2.id as test_id 46 | from (select 1 as id) t1 47 | left join (select 2 as id) t2 48 | on t1.id=t2.id 49 | """ 50 | 51 | 52 | class TestNotNullableColumnJoin: 53 | @pytest.fixture(scope="class") 54 | def models(self): 55 | return { 56 | "not_nullable_column_model.sql": not_nullable_column_model, 57 | } 58 | 59 | def test_nullable_column_join(self, project): 60 | run_dbt(["run", "--select", "not_nullable_column_model"]) 61 | result = project.run_sql( 62 | "select isNullable(test_id) as is_nullable_column from not_nullable_column_model", 63 | fetch="one", 64 | ) 65 | assert result[0] == 0 66 | -------------------------------------------------------------------------------- /tests/integration/adapter/relations/test_changing_relation_type.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.relations.test_changing_relation_type import BaseChangeRelationTypeValidator 2 | 3 | 4 | class TestChangeRelationTypes(BaseChangeRelationTypeValidator): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/integration/adapter/replicated_database/test_replicated_database.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.basic.files import model_incremental, schema_base_yml 3 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations 4 | from dbt.tests.adapter.basic.test_incremental import BaseIncremental 5 | 6 | 7 | class TestReplicatedDatabaseSimpleMaterialization(BaseSimpleMaterializations): 8 | """Contains tests for table, view and swappable view materialization.""" 9 | 10 | @pytest.fixture(scope="class") 11 | def test_config(self, test_config): 12 | test_config["db_engine"] = ( 13 | "Replicated('/clickhouse/databases/{uuid}', '{shard}', '{replica}')" 14 | ) 15 | return test_config 16 | 17 | 18 | class TestReplicatedDatabaseIncremental(BaseIncremental): 19 | @pytest.fixture(scope="class") 20 | def test_config(self, test_config): 21 | test_config["db_engine"] = ( 22 | "Replicated('/clickhouse/databases/{uuid}', '{shard}', '{replica}')" 23 | ) 24 | return test_config 25 | 26 | @pytest.fixture(scope="class") 27 | def models(self): 28 | config_materialized_incremental = """ 29 | {{ config(order_by='(some_date, id, name)', inserts_only=True, materialized='incremental', unique_key='id') }} 30 | """ 31 | incremental_sql = config_materialized_incremental + model_incremental 32 | return { 33 | "incremental.sql": incremental_sql, 34 | "schema.yml": schema_base_yml, 35 | } 36 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_array.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.base_array_utils import BaseArrayUtils 3 | from dbt.tests.adapter.utils.fixture_array_append import models__array_append_actual_sql 4 | from dbt.tests.adapter.utils.fixture_array_concat import models__array_concat_actual_sql 5 | 6 | # Empty arrays are constructed with the DBT default "integer" which is an Int32. Because ClickHouse will coerce 7 | # the arrays to the smallest possible type, we need to ensure that at least one of the members requires an Int32 8 | models__array_append_expected_sql = """ 9 | select 1 as id, {{ array_construct([1,2,3,-77777777]) }} as array_col union all 10 | select 2 as id, {{ array_construct([4]) }} as array_col 11 | """ 12 | 13 | 14 | class TestArrayAppend(BaseArrayUtils): 15 | @pytest.fixture(scope="class") 16 | def models(self): 17 | return { 18 | "actual.sql": models__array_append_actual_sql, 19 | "expected.sql": models__array_append_expected_sql, 20 | } 21 | 22 | 23 | models__array_concat_expected_sql = """ 24 | select 1 as id, {{ array_construct([1,2,3,4,5,-77777777]) }} as array_col union all 25 | select 2 as id, {{ array_construct([2]) }} as array_col union all 26 | select 3 as id, {{ array_construct([3]) }} as array_col 27 | """ 28 | 29 | 30 | class TestArrayConcat(BaseArrayUtils): 31 | @pytest.fixture(scope="class") 32 | def models(self): 33 | return { 34 | "actual.sql": models__array_concat_actual_sql, 35 | "expected.sql": models__array_concat_expected_sql, 36 | } 37 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_datatypes.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.data_types.base_data_type_macro import BaseDataTypeMacro 3 | from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean 4 | from dbt.tests.adapter.utils.data_types.test_type_float import BaseTypeFloat 5 | from dbt.tests.adapter.utils.data_types.test_type_int import BaseTypeInt 6 | from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric 7 | from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString 8 | from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp 9 | 10 | models__bigint_expected_sql = """ 11 | select -9223372036854775800 as bigint_col 12 | """ 13 | 14 | models__bigint_actual_sql = """ 15 | select cast('-9223372036854775800' as {{ type_bigint() }}) as bigint_col 16 | """ 17 | 18 | 19 | class TestTypeBigInt(BaseDataTypeMacro): 20 | # Using negative numbers instead since BIGINT on ClickHouse is signed, but the SELECT without a sign 21 | # will be automatically cast to a UInt64 22 | @pytest.fixture(scope="class") 23 | def models(self): 24 | return { 25 | "expected.sql": models__bigint_expected_sql, 26 | "actual.sql": self.interpolate_macro_namespace( 27 | models__bigint_actual_sql, "type_bigint" 28 | ), 29 | } 30 | 31 | 32 | class TestTypeBoolean(BaseTypeBoolean): 33 | pass 34 | 35 | 36 | class TestTypeFloat(BaseTypeFloat): 37 | pass 38 | 39 | 40 | class TestTypeInt(BaseTypeInt): 41 | pass 42 | 43 | 44 | class TestTypeNumeric(BaseTypeNumeric): 45 | pass 46 | 47 | 48 | class TestTypeString(BaseTypeString): 49 | pass 50 | 51 | 52 | class TestTypeTimestamp(BaseTypeTimestamp): 53 | pass 54 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_dateadd.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd 3 | 4 | # We remove the null row from this test because (1) nullables work fine with Nullable ClickHouse types, but 5 | # (2) dealing with all the type conversions is ugly 6 | seeds__data_dateadd_csv = """from_time,interval_length,datepart,result 7 | 2018-01-01 01:00:00,1,day,2018-01-02 01:00:00 8 | 2018-01-01 01:00:00,1,month,2018-02-01 01:00:00 9 | 2018-01-01 01:00:00,1,year,2019-01-01 01:00:00 10 | 2018-01-01 01:00:00,1,hour,2018-01-01 02:00:00 11 | """ 12 | 13 | 14 | class TestDateAdd(BaseDateAdd): 15 | @pytest.fixture(scope="class") 16 | def seeds(self): 17 | return {"data_dateadd.csv": seeds__data_dateadd_csv} 18 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_datediff.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.fixture_datediff import models__test_datediff_yml 3 | from dbt.tests.adapter.utils.test_datediff import BaseDateDiff 4 | 5 | # Not testing null values (see discussion in test_dateadd.py 6 | seeds__data_datediff_csv = """first_date,second_date,datepart,result 7 | 2018-01-01 01:00:00,2018-01-02 01:00:00,day,1 8 | 2018-01-01 01:00:00,2018-02-01 01:00:00,month,1 9 | 2018-01-01 01:00:00,2019-01-01 01:00:00,year,1 10 | 2018-01-01 01:00:00,2018-01-01 02:00:00,hour,1 11 | 2018-01-01 01:00:00,2018-01-01 02:01:00,minute,61 12 | 2018-01-01 01:00:00,2018-01-01 02:00:01,second,3601 13 | 2019-12-31 00:00:00,2019-12-27 00:00:00,week,-1 14 | 2019-12-31 00:00:00,2019-12-30 00:00:00,week,0 15 | 2019-12-31 00:00:00,2020-01-02 00:00:00,week,0 16 | 2019-12-31 00:00:00,2020-01-06 02:00:00,week,1 17 | """ 18 | 19 | # Casting strings to dates in ClickHouse requires either a specific format or the use of a function, so skipping 20 | # that for now (also micro and milliseconds are not supported) 21 | models__test_datediff_sql = """ 22 | with data as ( 23 | select * from {{ ref('data_datediff') }} 24 | ) 25 | select 26 | case 27 | when datepart = 'second' then {{ datediff('first_date', 'second_date', 'second') }} 28 | when datepart = 'minute' then {{ datediff('first_date', 'second_date', 'minute') }} 29 | when datepart = 'hour' then {{ datediff('first_date', 'second_date', 'hour') }} 30 | when datepart = 'day' then {{ datediff('first_date', 'second_date', 'day') }} 31 | when datepart = 'week' then {{ datediff('first_date', 'second_date', 'week') }} 32 | when datepart = 'month' then {{ datediff('first_date', 'second_date', 'month') }} 33 | when datepart = 'year' then {{ datediff('first_date', 'second_date', 'year') }} 34 | else null 35 | end as actual, 36 | result as expected 37 | from data 38 | """ 39 | 40 | 41 | class TestDateDiff(BaseDateDiff): 42 | @pytest.fixture(scope="class") 43 | def seeds(self): 44 | return {"data_datediff.csv": seeds__data_datediff_csv} 45 | 46 | @pytest.fixture(scope="class") 47 | def models(self): 48 | return { 49 | "test_datediff.yml": models__test_datediff_yml, 50 | "test_datediff.sql": self.interpolate_macro_namespace( 51 | models__test_datediff_sql, "datediff" 52 | ), 53 | } 54 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_last_day.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.test_last_day import BaseLastDay 3 | 4 | 5 | class TestLastDay(BaseLastDay): 6 | @pytest.fixture(scope="class") 7 | def project_config_update(self): 8 | return { 9 | "name": "test", 10 | "seeds": { 11 | "test": { 12 | "data_last_day": { 13 | "+column_types": { 14 | "date_day": "Nullable(Date)", 15 | "result": "Nullable(Date)", 16 | }, 17 | }, 18 | }, 19 | }, 20 | } 21 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_listagg.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.fixture_listagg import ( 3 | models__test_listagg_yml, 4 | seeds__data_listagg_csv, 5 | ) 6 | from dbt.tests.adapter.utils.test_listagg import BaseListagg 7 | from dbt.tests.util import run_dbt 8 | 9 | models__test_listagg_custom_sql = """ 10 | select 11 | group_col, 12 | {{ listagg('string_text', "'_|_'", "ORDER BY order_col DESC", 2) }} as actual, 13 | 'top_ordered_limited' as version 14 | from {{ ref('data_listagg') }} group by group_col 15 | """ 16 | 17 | 18 | class TestListagg(BaseListagg): 19 | pass 20 | 21 | 22 | class TestCustomListagg: 23 | @pytest.fixture(scope="class") 24 | def seeds(self): 25 | return { 26 | "data_listagg.csv": seeds__data_listagg_csv, 27 | } 28 | 29 | @pytest.fixture(scope="class") 30 | def models(self): 31 | return { 32 | "test_listagg.yaml": models__test_listagg_yml, 33 | "test_listagg.sql": models__test_listagg_custom_sql, 34 | } 35 | 36 | def test_listagg_run(self, project): 37 | run_dbt(["seed"]) 38 | run_dbt() 39 | results = project.run_sql("select * from test_listagg", fetch="all") 40 | assert len(results) == 3 41 | assert results[0] == (3, 'g_|_g', 'top_ordered_limited') 42 | assert results[1] == (2, 'p_|_a', 'top_ordered_limited') 43 | assert results[2] == (1, 'c_|_b', 'top_ordered_limited') 44 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_replace.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.fixture_replace import models__test_replace_yml 3 | from dbt.tests.adapter.utils.test_replace import BaseReplace 4 | 5 | models__test_replace_sql = """ 6 | select 7 | 8 | {{ replace('string_text', 'a', 'b') }} as actual, 9 | result as expected 10 | 11 | from {{ ref('data_replace') }} WHERE 'string_text' = 'a' 12 | 13 | UNION ALL 14 | 15 | select 16 | 17 | {{ replace('string_text', 'http://', '') }} as actual, 18 | result as expected 19 | 20 | from {{ ref('data_replace') }} WHERE 'string_text' LIKE '%google%' 21 | 22 | """ 23 | 24 | 25 | class TestReplace(BaseReplace): 26 | @pytest.fixture(scope="class") 27 | def models(self): 28 | return { 29 | "test_replace.yml": models__test_replace_yml, 30 | "test_replace.sql": self.interpolate_macro_namespace( 31 | models__test_replace_sql, "replace" 32 | ), 33 | } 34 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_split_part.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.fixture_split_part import models__test_split_part_yml 3 | from dbt.tests.adapter.utils.test_split_part import BaseSplitPart 4 | 5 | models__test_split_part_sql = """ 6 | with data as ( 7 | 8 | select * from {{ ref('data_split_part') }} 9 | 10 | ) 11 | 12 | select 13 | {{ split_part('parts', "'|'", 1) }} as actual, 14 | result_1 as expected 15 | 16 | from data 17 | 18 | union all 19 | 20 | select 21 | {{ split_part('parts', "'|'", 2) }} as actual, 22 | result_2 as expected 23 | 24 | from data 25 | 26 | union all 27 | 28 | select 29 | {{ split_part('parts', "'|'", 3) }} as actual, 30 | result_3 as expected 31 | 32 | from data 33 | """ 34 | 35 | 36 | class TestSplitPart(BaseSplitPart): 37 | @pytest.fixture(scope="class") 38 | def models(self): 39 | return { 40 | "test_split_part.yml": models__test_split_part_yml, 41 | "test_split_part.sql": models__test_split_part_sql, 42 | } 43 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_timestamps.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.test_timestamps import BaseCurrentTimestamps 3 | 4 | 5 | class TestCurrentTimestamps(BaseCurrentTimestamps): 6 | @pytest.fixture(scope="class") 7 | def expected_schema(self): 8 | return { 9 | "current_timestamp": "DateTime", 10 | "current_timestamp_in_utc_backcompat": "DateTime", 11 | "current_timestamp_backcompat": "DateTime", 12 | } 13 | 14 | @pytest.fixture(scope="class") 15 | def expected_sql(self): 16 | return None 17 | -------------------------------------------------------------------------------- /tests/integration/adapter/utils/test_unchanged.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.utils.test_any_value import BaseAnyValue 2 | from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct 3 | from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr 4 | from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText 5 | from dbt.tests.adapter.utils.test_concat import BaseConcat 6 | from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive 7 | from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc 8 | from dbt.tests.adapter.utils.test_escape_single_quotes import ( 9 | BaseEscapeSingleQuotesBackslash, 10 | BaseEscapeSingleQuotesQuote, 11 | ) 12 | from dbt.tests.adapter.utils.test_except import BaseExcept 13 | from dbt.tests.adapter.utils.test_hash import BaseHash 14 | from dbt.tests.adapter.utils.test_intersect import BaseIntersect 15 | from dbt.tests.adapter.utils.test_length import BaseLength 16 | from dbt.tests.adapter.utils.test_position import BasePosition 17 | from dbt.tests.adapter.utils.test_right import BaseRight 18 | from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast 19 | from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral 20 | 21 | 22 | class TestAnyValue(BaseAnyValue): 23 | pass 24 | 25 | 26 | class TestBaseBoolOr(BaseBoolOr): 27 | pass 28 | 29 | 30 | class TestCastBoolToText(BaseCastBoolToText): 31 | pass 32 | 33 | 34 | class TestConcat(BaseConcat): 35 | pass 36 | 37 | 38 | class TestDateTrunc(BaseDateTrunc): 39 | pass 40 | 41 | 42 | class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote): 43 | pass 44 | 45 | 46 | class TestEscapeSingleQuotesBackslash(BaseEscapeSingleQuotesBackslash): 47 | pass 48 | 49 | 50 | class TestExcept(BaseExcept): 51 | pass 52 | 53 | 54 | class TestHash(BaseHash): 55 | pass 56 | 57 | 58 | class TestIntersect(BaseIntersect): 59 | pass 60 | 61 | 62 | class TestLength(BaseLength): 63 | pass 64 | 65 | 66 | class TestPosition(BasePosition): 67 | pass 68 | 69 | 70 | class TestRight(BaseRight): 71 | pass 72 | 73 | 74 | class TestSafeCast(BaseSafeCast): 75 | pass 76 | 77 | 78 | class TestStringLiteral(BaseStringLiteral): 79 | pass 80 | 81 | 82 | class TestCurrentTimestampNaive(BaseCurrentTimestampNaive): 83 | pass 84 | 85 | 86 | class TestArrayConstruct(BaseArrayConstruct): 87 | pass 88 | -------------------------------------------------------------------------------- /tests/integration/adapter/view/test_view.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test ClickHouse view materialization in dbt-clickhouse 3 | """ 4 | 5 | import json 6 | 7 | import pytest 8 | from dbt.tests.util import run_dbt 9 | 10 | PEOPLE_SEED_CSV = """ 11 | id,name,age,department 12 | 1231,Dade,33,engineering 13 | 6666,Ksenia,48,engineering 14 | 8888,Kate,50,engineering 15 | """.lstrip() 16 | 17 | PEOPLE_VIEW_MODEL = """ 18 | {{ config( 19 | materialized='view' 20 | ) }} 21 | 22 | {% if var('run_type', '') == '' %} 23 | select id, name, age from {{ source('raw', 'people') }} 24 | {% elif var('run_type', '') == 'update_view' %} 25 | select id, name, age, department from {{ source('raw', 'people') }} 26 | {% endif %} 27 | """ 28 | 29 | 30 | SEED_SCHEMA_YML = """ 31 | version: 2 32 | 33 | sources: 34 | - name: raw 35 | schema: "{{ target.schema }}" 36 | tables: 37 | - name: people 38 | """ 39 | 40 | 41 | class TestClickHouseView: 42 | @pytest.fixture(scope="class") 43 | def seeds(self): 44 | return { 45 | "people.csv": PEOPLE_SEED_CSV, 46 | "schema.yml": SEED_SCHEMA_YML, 47 | } 48 | 49 | @pytest.fixture(scope="class") 50 | def models(self): 51 | return {"people_view.sql": PEOPLE_VIEW_MODEL} 52 | 53 | def test_create_view(self, project): 54 | # Load seed data 55 | run_dbt(["seed"]) 56 | 57 | # Run dbt to create the view 58 | run_dbt() 59 | 60 | # Query the view and check if it returns expected data 61 | result = project.run_sql("SELECT COUNT(*) FROM people_view", fetch="one") 62 | assert result[0] == 3 # 3 records in the seed data 63 | 64 | # Run dbt again to apply the update 65 | run_dbt(["run", "--vars", json.dumps({"run_type": "update_view"})]) 66 | 67 | # Verify the new column is present 68 | result = project.run_sql("DESCRIBE TABLE people_view", fetch="all") 69 | columns = {row[0] for row in result} 70 | assert "department" in columns # New column should be present 71 | -------------------------------------------------------------------------------- /tests/integration/adapter/view/test_view_sql_security.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test ClickHouse view with sql security settings in dbt-clickhouse 3 | """ 4 | 5 | import os 6 | 7 | import pytest 8 | from dbt.tests.util import run_dbt, run_dbt_and_capture 9 | 10 | PEOPLE_SEED_CSV = """ 11 | id,name,age,department 12 | 1231,Dade,33,engineering 13 | 6666,Ksenia,48,engineering 14 | 8888,Kate,50,engineering 15 | """.lstrip() 16 | 17 | SEED_SCHEMA_YML = """ 18 | version: 2 19 | 20 | sources: 21 | - name: raw 22 | schema: "{{ target.schema }}" 23 | tables: 24 | - name: people 25 | """ 26 | 27 | PEOPLE_VIEW_CONFIG = """ 28 | {{ config( 29 | materialized='view', 30 | sql_security='invoker' 31 | ) }} 32 | """ 33 | 34 | PEOPLE_VIEW_CONFIG_2 = """ 35 | {{ config( 36 | materialized='view', 37 | sql_security='definer', 38 | definer='%s' 39 | ) }} 40 | """ 41 | 42 | PEOPLE_VIEW_CONFIG_3 = """ 43 | {{ config( 44 | materialized='view', 45 | sql_security='definer' 46 | ) }} 47 | """ 48 | 49 | PEOPLE_VIEW_CONFIG_4 = """ 50 | {{ config( 51 | materialized='view', 52 | sql_security='definer', 53 | definer='' 54 | ) }} 55 | """ 56 | 57 | PEOPLE_VIEW_CONFIG_5 = """ 58 | {{ config( 59 | materialized='view', 60 | sql_security='wrong' 61 | ) }} 62 | """ 63 | 64 | PEOPLE_VIEW_MODEL = """ 65 | select 66 | id, 67 | name, 68 | age, 69 | department 70 | from {{ source('raw', 'people') }} 71 | """ 72 | 73 | 74 | class TestClickHouseViewSqlSecurity: 75 | @pytest.fixture(scope="class") 76 | def seeds(self): 77 | return { 78 | "people.csv": PEOPLE_SEED_CSV, 79 | "schema.yml": SEED_SCHEMA_YML, 80 | } 81 | 82 | @pytest.fixture(scope="class") 83 | def models(self): 84 | return { 85 | "view_invoker.sql": PEOPLE_VIEW_CONFIG + PEOPLE_VIEW_MODEL, 86 | "view_definer.sql": PEOPLE_VIEW_CONFIG_2 % os.environ.get('DBT_CH_TEST_USER', 'default') 87 | + PEOPLE_VIEW_MODEL, 88 | "view_definer_empty.sql": PEOPLE_VIEW_CONFIG_3 + PEOPLE_VIEW_MODEL, 89 | "view_definer_wrong.sql": PEOPLE_VIEW_CONFIG_4 + PEOPLE_VIEW_MODEL, 90 | "view_sql_security.sql": PEOPLE_VIEW_CONFIG_5 + PEOPLE_VIEW_MODEL, 91 | } 92 | 93 | def test_create_view_invoker(self, project): 94 | # Load seed data 95 | run_dbt(["seed"]) 96 | 97 | # Run dbt to create the view 98 | run_dbt(["run", "--select", "view_invoker"]) 99 | 100 | # Query system table to be sure that view query contains desired statement 101 | result = project.run_sql( 102 | """select 1 from system.tables 103 | where table = 'view_invoker' 104 | and position(create_table_query, 'SQL SECURITY INVOKER') > 0""", 105 | fetch="one", 106 | ) 107 | assert result[0] == 1 # 1 records in the seed data 108 | 109 | def test_create_view_definer(self, project): 110 | # Load seed data 111 | run_dbt(["seed"]) 112 | 113 | # Run dbt to create the view 114 | run_dbt(["run", "--select", "view_definer"]) 115 | 116 | # Query system table to be sure that view query contains desired statement 117 | result = project.run_sql( 118 | f"""select 1 from system.tables 119 | where table = 'view_definer' 120 | and position(create_table_query, 'DEFINER = {os.environ.get('DBT_CH_TEST_USER', 'default')} SQL SECURITY DEFINER') > 0""", 121 | fetch="one", 122 | ) 123 | assert result[0] == 1 # 3 records in the seed data 124 | 125 | def test_fail_view_definer_empty(self, project): 126 | # Load seed data 127 | run_dbt(["seed"]) 128 | 129 | # Run dbt to create the view 130 | _, stdout = run_dbt_and_capture( 131 | ["run", "--select", "view_definer_empty"], expect_pass=False 132 | ) 133 | 134 | # Confirm that stdout/console output has error description 135 | assert ( 136 | "Model 'model.test.view_definer_empty' does not define a required config parameter 'definer'." 137 | in stdout 138 | ) 139 | 140 | def test_fail_view_definer_wrong(self, project): 141 | # Load seed data 142 | run_dbt(["seed"]) 143 | 144 | # Run dbt to create the view 145 | _, stdout = run_dbt_and_capture( 146 | ["run", "--select", "view_definer_wrong"], expect_pass=False 147 | ) 148 | 149 | # Confirm that stdout/console output has error description 150 | assert "Invalid config parameter `definer`. No value was provided." in stdout 151 | 152 | def test_fail_view_sql_security(self, project): 153 | # Load seed data 154 | run_dbt(["seed"]) 155 | 156 | # Run dbt to create the view 157 | _, stdout = run_dbt_and_capture(["run", "--select", "view_sql_security"], expect_pass=False) 158 | 159 | # Confirm that stdout/console output has error description 160 | assert ( 161 | "Invalid config parameter `sql_security`. Got: `wrong`, but only definer | invoker allowed." 162 | in stdout 163 | ) 164 | -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import sys 4 | import time 5 | import timeit 6 | from pathlib import Path 7 | from subprocess import PIPE, Popen 8 | 9 | import pytest 10 | import requests 11 | from clickhouse_connect import get_client 12 | 13 | 14 | # Ensure that test users exist in environment 15 | @pytest.fixture(scope="session", autouse=True) 16 | def ch_test_users(): 17 | test_users = [ 18 | os.environ.setdefault(f'DBT_TEST_USER_{x}', f'dbt_test_user_{x}') for x in range(1, 4) 19 | ] 20 | yield test_users 21 | 22 | 23 | @pytest.fixture(scope="session", autouse=True) 24 | def ch_test_version(): 25 | yield os.environ.get('DBT_CH_TEST_CH_VERSION', 'latest') 26 | 27 | 28 | # This fixture is for customizing tests that need overrides in adapter 29 | # repos. Example in dbt.tests.adapter.basic.test_base. 30 | @pytest.fixture(scope="session") 31 | def test_config(ch_test_users, ch_test_version): 32 | compose_file = f'{Path(__file__).parent}/docker-compose.yml' 33 | test_host = os.environ.get('DBT_CH_TEST_HOST', 'localhost') 34 | test_port = int(os.environ.get('DBT_CH_TEST_PORT', 8123)) 35 | client_port = int(os.environ.get('DBT_CH_TEST_CLIENT_PORT', 0)) 36 | test_driver = os.environ.get('DBT_CH_TEST_DRIVER', '').lower() 37 | if test_driver == '': 38 | test_driver = 'native' if test_port in (10900, 9000, 9440) else 'http' 39 | test_user = os.environ.get('DBT_CH_TEST_USER', 'default') 40 | test_password = os.environ.get('DBT_CH_TEST_PASSWORD', '') 41 | test_cluster = os.environ.get('DBT_CH_TEST_CLUSTER', '') 42 | test_db_engine = os.environ.get('DBT_CH_TEST_DB_ENGINE', '') 43 | test_secure = test_port in (8443, 9440) 44 | test_cluster_mode = os.environ.get('DBT_CH_TEST_CLUSTER_MODE', '').lower() in ( 45 | '1', 46 | 'true', 47 | 'yes', 48 | ) 49 | if ch_test_version.startswith('22.3'): 50 | os.environ['DBT_CH_TEST_SETTINGS'] = '22_3' 51 | 52 | docker = os.environ.get('DBT_CH_TEST_USE_DOCKER', '').lower() in ('1', 'true', 'yes') 53 | 54 | if docker: 55 | client_port = client_port or 10723 56 | test_port = 10900 if test_driver == 'native' else client_port 57 | try: 58 | run_cmd(['docker-compose', '-f', compose_file, 'down', '-v']) 59 | sys.stderr.write('Starting docker compose') 60 | os.environ['PROJECT_ROOT'] = '.' 61 | up_result = run_cmd(['docker-compose', '-f', compose_file, 'up', '-d']) 62 | if up_result[0]: 63 | raise Exception(f'Failed to start docker: {up_result[2]}') 64 | url = f"http://{test_host}:{client_port}" 65 | wait_until_responsive(timeout=30.0, pause=0.5, check=lambda: is_responsive(url)) 66 | except Exception as e: 67 | raise Exception('Failed to run docker-compose: {}', str(e)) 68 | elif not client_port: 69 | if test_driver == 'native': 70 | client_port = 8443 if test_port == 9440 else 8123 71 | else: 72 | client_port = test_port 73 | 74 | test_client = get_client( 75 | host=test_host, 76 | port=client_port, 77 | username=test_user, 78 | password=test_password, 79 | secure=test_secure, 80 | ) 81 | for dbt_user in ch_test_users: 82 | cmd = 'CREATE USER IF NOT EXISTS %s IDENTIFIED WITH sha256_hash BY %s' 83 | if test_cluster != '': 84 | cmd = f'CREATE USER IF NOT EXISTS %s ON CLUSTER "{test_cluster}" IDENTIFIED WITH sha256_hash BY %s' 85 | 86 | test_client.command( 87 | cmd, 88 | (dbt_user, '5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8'), 89 | ) 90 | yield { 91 | 'driver': test_driver, 92 | 'host': test_host, 93 | 'port': test_port, 94 | 'user': test_user, 95 | 'password': test_password, 96 | 'cluster': test_cluster, 97 | 'db_engine': test_db_engine, 98 | 'secure': test_secure, 99 | 'cluster_mode': test_cluster_mode, 100 | 'database': '', 101 | } 102 | 103 | if docker: 104 | try: 105 | run_cmd(['docker-compose', '-f', compose_file, 'down', '-v']) 106 | except Exception as e: 107 | raise Exception('Failed to run docker-compose while cleaning up: {}', str(e)) 108 | else: 109 | for test_user in ch_test_users: 110 | test_client.command('DROP USER %s', (test_user,)) 111 | 112 | 113 | # The profile dictionary, used to write out profiles.yml 114 | # dbt will supply a unique schema per test, so we do not specify 'schema' here 115 | @pytest.fixture(scope="class") 116 | def dbt_profile_target(test_config): 117 | return { 118 | 'type': 'clickhouse', 119 | 'threads': 4, 120 | 'driver': test_config['driver'], 121 | 'host': test_config['host'], 122 | 'user': test_config['user'], 123 | 'password': test_config['password'], 124 | 'port': test_config['port'], 125 | 'cluster': test_config['cluster'], 126 | 'database_engine': test_config['db_engine'], 127 | 'cluster_mode': test_config['cluster_mode'], 128 | 'secure': test_config['secure'], 129 | 'check_exchange': False, 130 | 'use_lw_deletes': True, 131 | 'custom_settings': { 132 | 'distributed_ddl_task_timeout': 300, 133 | 'input_format_skip_unknown_fields': 1, 134 | }, 135 | } 136 | 137 | 138 | @pytest.fixture(scope="class") 139 | def prefix(): 140 | return f"dbt_clickhouse_{random.randint(1000, 9999)}" 141 | 142 | 143 | @pytest.fixture(scope="class") 144 | def unique_schema(request, prefix) -> str: 145 | test_file = request.module.__name__.split(".")[-1] 146 | return f"{prefix}_{test_file}_{int(time.time() * 1000)}" 147 | 148 | 149 | def run_cmd(cmd): 150 | with Popen(cmd, stdout=PIPE, stderr=PIPE) as popen: 151 | stdout, stderr = popen.communicate() 152 | return popen.returncode, stdout, stderr 153 | 154 | 155 | def is_responsive(url): 156 | try: 157 | response = requests.get(url) 158 | if response.status_code == 200: 159 | return True 160 | except requests.exceptions.ConnectionError: 161 | return False 162 | 163 | 164 | def wait_until_responsive(check, timeout, pause, clock=timeit.default_timer): 165 | ref = clock() 166 | now = ref 167 | while (now - ref) < timeout: 168 | time.sleep(pause) 169 | if check(): 170 | return 171 | now = clock() 172 | raise Exception("Timeout reached while waiting on service!") 173 | -------------------------------------------------------------------------------- /tests/integration/docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | 4 | x-ch-common: &ch-common 5 | volumes: 6 | - /var/lib/clickhouse 7 | - type: bind 8 | source: ${PROJECT_ROOT:-.}/test_settings_${DBT_CH_TEST_SETTINGS:-latest}.xml 9 | target: /etc/clickhouse-server/users.d/test_settings.xml 10 | - type: bind 11 | source: ${PROJECT_ROOT:-.}/test_config.xml 12 | target: /etc/clickhouse-server/config.d/test_config.xml 13 | ulimits: 14 | nofile: 15 | soft: 262144 16 | hard: 262144 17 | 18 | services: 19 | ch0: 20 | image: clickhouse/clickhouse-server:${DBT_CH_TEST_CH_VERSION:-latest} 21 | environment: 22 | - SERVER_INDEX=1 23 | - SHARD_NUM=${SHARD_NUM:-1} 24 | - REPLICA_NUM=${REPLICA_NUM:-1} 25 | - CLICKHOUSE_SKIP_USER_SETUP=1 26 | ports: 27 | - "8123:8123" 28 | - "8443:8443" 29 | - "9000:9000" 30 | # for local docker tests 31 | - "10723:8123" 32 | - "10743:8443" 33 | - "10900:9000" 34 | <<: *ch-common 35 | ch1: 36 | image: clickhouse/clickhouse-server:${DBT_CH_TEST_CH_VERSION:-latest} 37 | environment: 38 | - SERVER_INDEX=2 39 | - SHARD_NUM=${SHARD_NUM:-2} 40 | - REPLICA_NUM=${REPLICA_NUM:-2} 41 | - CLICKHOUSE_SKIP_USER_SETUP=1 42 | <<: *ch-common 43 | ch2: 44 | image: clickhouse/clickhouse-server:${DBT_CH_TEST_CH_VERSION:-latest} 45 | environment: 46 | - SERVER_INDEX=3 47 | - SHARD_NUM=${SHARD_NUM:-3} 48 | - REPLICA_NUM=${REPLICA_NUM:-3} 49 | - CLICKHOUSE_SKIP_USER_SETUP=1 50 | <<: *ch-common 51 | 52 | networks: 53 | default: 54 | name: integration-test 55 | -------------------------------------------------------------------------------- /tests/integration/test_config.xml: -------------------------------------------------------------------------------- 1 | 2 | 8123 3 | 9000 4 | 9009 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | ch0 15 | 9000 16 | 17 | 18 | 19 | 20 | ch1 21 | 9000 22 | 23 | 24 | 25 | 26 | ch2 27 | 9000 28 | 29 | 30 | 31 | 32 | 33 | 34 | ch0 35 | 9000 36 | 37 | 38 | ch1 39 | 9000 40 | 41 | 42 | ch2 43 | 9000 44 | 45 | 46 | 47 | 48 | 49 | 9181 50 | 51 | 52 | 53 | 10000 54 | 30000 55 | 56 | 57 | 58 | 59 | 1 60 | ch0 61 | 9234 62 | 63 | 64 | 2 65 | ch1 66 | 9234 67 | 68 | 69 | 3 70 | ch2 71 | 9234 72 | 73 | 74 | 75 | 76 | 77 | ch0 78 | 9181 79 | 80 | 81 | ch1 82 | 9181 83 | 84 | 85 | ch2 86 | 9181 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /tests/integration/test_settings_22_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/integration/test_settings_latest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 6 | 7 | 8 | 9 | 1 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/unit/test_util.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | from dbt.adapters.clickhouse.util import compare_versions, hide_stack_trace 4 | 5 | 6 | def test_is_before_version(): 7 | assert compare_versions('20.0.0', '21.0.0') == -1 8 | assert compare_versions('20.1.0', '21.0.0') == -1 9 | assert compare_versions('20.1.1', '21.0.0') == -1 10 | assert compare_versions('20.0.0', '21.0') == -1 11 | assert compare_versions('21.0.0', '21.0.0') == 0 12 | assert compare_versions('21.1.0', '21.0.0') == 1 13 | assert compare_versions('22.0.0', '21.0.0') == 1 14 | assert compare_versions('21.0.1', '21.0.0') == 1 15 | assert compare_versions('21.0.1', '21.0') == 0 16 | 17 | 18 | def test_hide_stack_trace_no_env_var(): 19 | # Test when HIDE_STACK_TRACE is not set 20 | with patch('os.getenv', return_value=''): 21 | exception = Exception("Error occurred\nStack trace details follow...") 22 | result = hide_stack_trace(exception) 23 | assert result == "Error occurred\nStack trace details follow..." 24 | 25 | 26 | def test_hide_stack_trace_env_var_set(): 27 | # Test when HIDE_STACK_TRACE is set 28 | with patch('os.getenv', return_value='1'): 29 | exception = Exception("Error occurred\nStack trace details follow...") 30 | result = hide_stack_trace(exception) 31 | assert result == "Error occurred" 32 | --------------------------------------------------------------------------------