├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── test_retries_connect.py │ ├── test_retries_query.py │ ├── test_get_column_schema.py │ ├── test_external_utils.py │ ├── test_column.py │ ├── utils.py │ └── test_data_path_quoting.py ├── functional │ ├── adapter │ │ ├── indexes │ │ │ ├── __init__.py │ │ │ ├── fixtures.py │ │ │ └── test_indexes.py │ │ ├── test_concurrency.py │ │ ├── test_changing_relation_type.py │ │ ├── store_test_failures_tests │ │ │ └── test_store_test_failures.py │ │ ├── test_empty.py │ │ ├── test_simple_snapshot.py │ │ ├── test_caching.py │ │ ├── aliases │ │ │ └── test_aliases.py │ │ ├── test_persist_docs.py │ │ ├── utils │ │ │ ├── test_date_spine.py │ │ │ └── test_utils.py │ │ ├── test_unit_testing.py │ │ ├── test_community_extensions.py │ │ ├── test_hooks.py │ │ ├── simple_seed │ │ │ └── test_fast_seed.py │ │ ├── test_sources.py │ │ ├── test_rematerialize.py │ │ ├── test_attach.py │ │ ├── test_basic.py │ │ ├── test_constraints.py │ │ ├── test_ephemeral.py │ │ ├── test_write_options.py │ │ └── test_table_function.py │ ├── plugins │ │ ├── motherduck │ │ │ ├── fixtures.py │ │ │ ├── conftest.py │ │ │ ├── test_motherduck_ducklake.py │ │ │ ├── test_motherduck_write_conflict.py │ │ │ ├── test_macros.py │ │ │ └── test_motherduck_attach.py │ │ ├── test_glue.py │ │ ├── test_sqlite.py │ │ ├── test_iceberg.py │ │ ├── test_excel.py │ │ ├── test_gsheet.py │ │ ├── test_delta.py │ │ └── test_plugins.py │ └── fsspec │ │ └── test_filesystems.py ├── data │ └── excel_file.xlsx ├── create_function_plugin.py ├── bv_test_server.py └── conftest.py ├── MANIFEST.in ├── mypy.ini ├── dbt ├── include │ ├── duckdb │ │ ├── __init__.py │ │ ├── dbt_project.yml │ │ ├── macros │ │ │ ├── utils │ │ │ │ ├── any_value.sql │ │ │ │ ├── splitpart.sql │ │ │ │ ├── generate_series.sql │ │ │ │ ├── external_location.sql │ │ │ │ ├── lastday.sql │ │ │ │ ├── datediff.sql │ │ │ │ ├── listagg.sql │ │ │ │ ├── dateadd.sql │ │ │ │ └── upstream.sql │ │ │ ├── materializations │ │ │ │ ├── hooks.sql │ │ │ │ ├── incremental_strategy │ │ │ │ │ ├── merge_defaults.sql │ │ │ │ │ ├── validation_helper.sql │ │ │ │ │ ├── delete_insert.sql │ │ │ │ │ └── merge_config_validation.sql │ │ │ │ ├── table_function.sql │ │ │ │ └── table.sql │ │ │ ├── columns.sql │ │ │ ├── catalog.sql │ │ │ ├── persist_docs.sql │ │ │ ├── snapshot_helper.sql │ │ │ └── seed.sql │ │ └── sample_profiles.yml │ └── __init__.py ├── __init__.py └── adapters │ ├── __init__.py │ └── duckdb │ ├── constants.py │ ├── __version__.py │ ├── __init__.py │ ├── plugins │ ├── pd_utils.py │ ├── iceberg.py │ ├── delta.py │ ├── sqlalchemy.py │ ├── gsheet.py │ ├── motherduck.py │ └── postgres.py │ ├── environments │ ├── motherduck.py │ └── buenavista.py │ ├── utils.py │ ├── secrets.py │ ├── column.py │ ├── relation.py │ └── connections.py ├── setup.py ├── .flake8 ├── .github ├── dependabot.yml └── workflows │ ├── nightly.yml │ └── release.yml ├── pytest.ini ├── scripts └── build-dist.sh ├── dev-requirements.txt ├── .gitignore ├── .pre-commit-config.yaml ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── setup.cfg ├── CHANGELOG.md └── tox.ini /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional/adapter/indexes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include dbt/include *.sql *.yml *.md 2 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | mypy_path = ./third-party-stubs 3 | namespace_packages = True 4 | -------------------------------------------------------------------------------- /dbt/include/duckdb/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PACKAGE_PATH = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /tests/data/excel_file.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/dbt-duckdb/HEAD/tests/data/excel_file.xlsx -------------------------------------------------------------------------------- /dbt/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /dbt/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /dbt/include/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /dbt/include/duckdb/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | name: dbt_duckdb 3 | version: 1.0 4 | config-version: 2 5 | 6 | macro-paths: ["macros"] 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup 3 | 4 | setup( 5 | setup_requires=["pbr"], 6 | pbr=True, 7 | ) 8 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/any_value.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__any_value(expression) -%} 2 | 3 | arbitrary({{ expression }}) 4 | 5 | {%- endmacro %} 6 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_concurrency.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.concurrency.test_concurrency import TestConcurenncy 2 | 3 | 4 | class TestConcurrencyDuckDB(TestConcurenncy): 5 | pass 6 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/splitpart.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__split_part(string_text, delimiter_text, part_number) %} 2 | string_split({{ string_text }}, {{ delimiter_text }})[ {{ part_number }} ] 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = 3 | E 4 | W 5 | F 6 | ignore = 7 | W503 # makes Flake8 work like black 8 | W504 9 | E203 # makes Flake8 work like black 10 | E741 11 | E501 12 | exclude = tests 13 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/generate_series.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__generate_series(upper_bound) %} 2 | select 3 | generate_series as generated_number 4 | from generate_series(1, {{ upper_bound }}) 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/constants.py: -------------------------------------------------------------------------------- 1 | TEMP_SCHEMA_NAME = "temp_schema_name" 2 | DEFAULT_TEMP_SCHEMA_NAME = "dbt_temp" 3 | DUCKDB_MERGE_LOWEST_VERSION_POSSIBLE = "1.4.0-dev0" 4 | DUCKDB_BASE_INCREMENTAL_STRATEGIES = ["append", "delete+insert"] 5 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_changing_relation_type.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.relations.test_changing_relation_type import BaseChangeRelationTypeValidator 2 | 3 | 4 | class TestChangeRelationTypesDuckDB(BaseChangeRelationTypeValidator): 5 | pass -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | - package-ecosystem: "pip" 8 | directory: "/" 9 | schedule: 10 | interval: "weekly" 11 | -------------------------------------------------------------------------------- /dbt/include/duckdb/sample_profiles.yml: -------------------------------------------------------------------------------- 1 | default: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | path: dev.duckdb 6 | threads: 1 7 | 8 | prod: 9 | type: duckdb 10 | path: prod.duckdb 11 | threads: 4 12 | 13 | target: dev 14 | -------------------------------------------------------------------------------- /tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import ( 2 | TestStoreTestFailures, 3 | ) 4 | 5 | 6 | class DuckDBTestStoreTestFailures(TestStoreTestFailures): 7 | pass 8 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning 4 | ignore:unclosed file .*:ResourceWarning 5 | testpaths = 6 | tests/functional 7 | tests/unit 8 | markers = 9 | skip_profile(profile) 10 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_empty.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.empty.test_empty import BaseTestEmpty, BaseTestEmptyInlineSourceRef 2 | 3 | 4 | class TestDuckDBEmpty(BaseTestEmpty): 5 | pass 6 | 7 | 8 | class TestDuckDBEmptyInlineSourceRef(BaseTestEmptyInlineSourceRef): 9 | pass 10 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_simple_snapshot.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.simple_snapshot.test_snapshot import ( 2 | BaseSnapshotCheck, 3 | BaseSimpleSnapshot, 4 | ) 5 | 6 | 7 | class TestSimpleSnapshotDuckDB(BaseSimpleSnapshot): 8 | pass 9 | 10 | 11 | class TestSnapshotCheckDuckDB(BaseSnapshotCheck): 12 | pass 13 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_caching.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.caching.test_caching import ( 2 | BaseCachingLowercaseModel, 3 | BaseCachingSelectedSchemaOnly, 4 | ) 5 | 6 | 7 | class TestCachingLowerCaseModelDuckDB(BaseCachingLowercaseModel): 8 | pass 9 | 10 | 11 | class TestCachingSelectedSchemaOnlyDuckDB(BaseCachingSelectedSchemaOnly): 12 | pass 13 | -------------------------------------------------------------------------------- /scripts/build-dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | DBT_PATH="$( cd "$(dirname "$0")/.." ; pwd -P )" 6 | 7 | PYTHON_BIN=${PYTHON_BIN:-python} 8 | 9 | echo "$PYTHON_BIN" 10 | 11 | set -x 12 | 13 | rm -rf "$DBT_PATH"/dist 14 | rm -rf "$DBT_PATH"/build 15 | mkdir -p "$DBT_PATH"/dist 16 | 17 | cd "$DBT_PATH" 18 | $PYTHON_BIN setup.py sdist bdist_wheel 19 | 20 | set +x 21 | -------------------------------------------------------------------------------- /tests/functional/adapter/aliases/test_aliases.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.aliases.test_aliases import BaseAliases, BaseAliasErrors, BaseSameAliasDifferentSchemas 2 | 3 | class TestAliasesDuckDB(BaseAliases): 4 | pass 5 | 6 | class TestAliasesErrorDuckDB(BaseAliasErrors): 7 | pass 8 | 9 | class BaseSameALiasDifferentSchemasDuckDB(BaseSameAliasDifferentSchemas): 10 | pass 11 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/external_location.sql: -------------------------------------------------------------------------------- 1 | {%- macro external_location(relation, config) -%} 2 | {%- if config.get('options', {}).get('partition_by') is none -%} 3 | {%- set format = config.get('format', 'parquet') -%} 4 | {{- adapter.external_root() }}/{{ relation.identifier }}.{{ format }} 5 | {%- else -%} 6 | {{- adapter.external_root() }}/{{ relation.identifier }} 7 | {%- endif -%} 8 | {%- endmacro -%} 9 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/__version__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version as get_version 2 | 3 | _package_name = "dbt-duckdb" 4 | version = get_version(_package_name) 5 | # This is to get around SemVer 2 (dbt_common) vs Linux/Python compatible SemVer 3 (pbr) conflicting 6 | # See: https://docs.openstack.org/pbr/latest/user/semver.html 7 | _prerelease_tags = ["dev", "a", "b", "c"] 8 | for tag in _prerelease_tags: 9 | version = version.replace(f".{tag}", f"-{tag}") 10 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/lastday.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__last_day(date, datepart) -%} 2 | 3 | {%- if datepart == 'quarter' -%} 4 | -- duckdb dateadd does not support quarter interval. 5 | cast( 6 | {{dbt.dateadd('day', '-1', 7 | dbt.dateadd('month', '3', dbt.date_trunc(datepart, date)) 8 | )}} 9 | as date) 10 | {%- else -%} 11 | {{dbt.default_last_day(date, datepart)}} 12 | {%- endif -%} 13 | 14 | {%- endmacro %} 15 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.base import AdapterPlugin 2 | from dbt.adapters.duckdb.connections import DuckDBConnectionManager # noqa 3 | from dbt.adapters.duckdb.credentials import DuckDBCredentials 4 | from dbt.adapters.duckdb.impl import DuckDBAdapter 5 | from dbt.include import duckdb 6 | 7 | Plugin = AdapterPlugin( 8 | adapter=DuckDBAdapter, # type: ignore 9 | credentials=DuckDBCredentials, 10 | include_path=duckdb.PACKAGE_PATH, 11 | ) 12 | -------------------------------------------------------------------------------- /tests/create_function_plugin.py: -------------------------------------------------------------------------------- 1 | from duckdb import DuckDBPyConnection 2 | 3 | from dbt.adapters.duckdb.plugins import BasePlugin 4 | from dbt.adapters.duckdb.utils import TargetConfig 5 | 6 | 7 | def foo() -> int: 8 | return 1729 9 | 10 | 11 | class Plugin(BasePlugin): 12 | def configure_connection(self, conn: DuckDBPyConnection): 13 | conn.create_function("foo", foo) 14 | 15 | def store(self, target_config: TargetConfig): 16 | assert target_config.config.get("key") == "value" 17 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_persist_docs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.persist_docs.test_persist_docs import ( 4 | BasePersistDocs, 5 | BasePersistDocsColumnMissing, 6 | BasePersistDocsCommentOnQuotedColumn, 7 | ) 8 | 9 | @pytest.mark.skip_profile("md") 10 | class TestPersistDocs(BasePersistDocs): 11 | pass 12 | 13 | 14 | @pytest.mark.skip_profile("md") 15 | class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing): 16 | pass 17 | 18 | 19 | @pytest.mark.skip_profile("md") 20 | class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn): 21 | pass 22 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/pd_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from ..utils import TargetConfig 4 | 5 | 6 | def target_to_df(target_config: TargetConfig) -> pd.DataFrame: 7 | """Load a dataframe from a target config.""" 8 | location = target_config.location 9 | if location is None: 10 | raise Exception("Target config does not have a location") 11 | if location.format == "csv": 12 | return pd.read_csv(location.path) 13 | elif location.format == "parquet": 14 | return pd.read_parquet(location.path) 15 | else: 16 | raise Exception(f"Unsupported format: {location.format}") 17 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/hooks.sql: -------------------------------------------------------------------------------- 1 | -- this macro overrides the default run_hooks macro from dbt-adapters and drops the extra `commit;` 2 | -- because DuckDB does not begin a txn when a connection is created 3 | {% macro run_hooks(hooks, inside_transaction=True) %} 4 | {% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %} 5 | {% set rendered = render(hook.get('sql')) | trim %} 6 | {% if (rendered | length) > 0 %} 7 | {% call statement(auto_begin=inside_transaction) %} 8 | {{ rendered }} 9 | {% endcall %} 10 | {% endif %} 11 | {% endfor %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/datediff.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__datediff(first_date, second_date, datepart) -%} 2 | {% if datepart == 'week' %} 3 | ({{ datediff(first_date, second_date, 'day') }} // 7 + case 4 | when date_part('dow', ({{first_date}})::timestamp) <= date_part('dow', ({{second_date}})::timestamp) then 5 | case when {{first_date}} <= {{second_date}} then 0 else -1 end 6 | else 7 | case when {{first_date}} <= {{second_date}} then 1 else 0 end 8 | end) 9 | {% else %} 10 | (date_diff('{{ datepart }}', {{ first_date }}::timestamp, {{ second_date}}::timestamp )) 11 | {% endif %} 12 | {%- endmacro %} 13 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/listagg.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} 2 | {% if limit_num -%} 3 | list_aggr( 4 | (array_agg( 5 | {{ measure }} 6 | {% if order_by_clause -%} 7 | {{ order_by_clause }} 8 | {%- endif %} 9 | ))[1:{{ limit_num }}], 10 | 'string_agg', 11 | {{ delimiter_text }} 12 | ) 13 | {%- else %} 14 | string_agg( 15 | {{ measure }}, 16 | {{ delimiter_text }} 17 | {% if order_by_clause -%} 18 | {{ order_by_clause }} 19 | {%- endif %} 20 | ) 21 | {%- endif %} 22 | {%- endmacro %} 23 | -------------------------------------------------------------------------------- /tests/functional/plugins/motherduck/fixtures.py: -------------------------------------------------------------------------------- 1 | # 2 | # Models 3 | # 4 | 5 | models__gen_data_macro = """ 6 | select * from {{ ref("seed") }} 7 | """ 8 | 9 | # 10 | # Macros 11 | # 12 | 13 | macros__generate_database_name = """ 14 | {% macro generate_database_name(custom_database_name=none, node=none) -%} 15 | {{ target.database | trim }}_{{ var("build_env") | trim }}_{{ var("org_prefix") | trim }} 16 | {%- endmacro %} 17 | """ 18 | 19 | 20 | macros__generate_schema_name = """ 21 | {% macro generate_schema_name(custom_schema_name=none, node=none) -%} 22 | {{ target.schema | trim }}_{{ var("build_env") | trim }}_{{ var("org_prefix") | trim }} 23 | {%- endmacro %} 24 | """ 25 | 26 | # 27 | # Seeds 28 | # 29 | 30 | seeds__example_seed_csv = """a,b,c 31 | 1,2,3 32 | 4,5,6 33 | 7,8,9 34 | """ 35 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/columns.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %} 2 | 3 | {% if add_columns %} 4 | {% for column in add_columns %} 5 | {% set sql -%} 6 | alter {{ relation.type }} {{ relation }} add column 7 | {{ api.Relation.create(identifier=column.name) }} {{ column.data_type }} 8 | {%- endset -%} 9 | {% do run_query(sql) %} 10 | {% endfor %} 11 | {% endif %} 12 | 13 | {% if remove_columns %} 14 | {% for column in remove_columns %} 15 | {% set sql -%} 16 | alter {{ relation.type }} {{ relation }} drop column 17 | {{ api.Relation.create(identifier=column.name) }} 18 | {%- endset -%} 19 | {% do run_query(sql) %} 20 | {% endfor %} 21 | {% endif %} 22 | 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # install latest changes in dbt-core + dbt-tests-adapter 2 | # git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core 3 | # git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter 4 | 5 | dbt-tests-adapter==1.19.5 6 | 7 | boto3 8 | mypy-boto3-glue 9 | pandas 10 | pyarrow==22.0.0 11 | buenavista==0.5.0 12 | bumpversion 13 | flaky 14 | freezegun==1.5.5 15 | fsspec 16 | gspread 17 | ipdb 18 | mypy==1.18.2 19 | openpyxl 20 | pip-tools 21 | pre-commit 22 | psycopg2-binary 23 | psycopg[binary] 24 | pyiceberg 25 | pytest 26 | pytest-dotenv 27 | logbook<1.9 # pytest-logbook still imports logbook.compat 28 | pytest-logbook 29 | pytest-csv 30 | pytest-xdist 31 | pytest-mock 32 | testcontainers[postgres] 33 | pytz 34 | ruff 35 | sqlalchemy 36 | tox>=3.13 37 | twine 38 | wheel 39 | deltalake 40 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/dateadd.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__dateadd(datepart, interval, from_date_or_timestamp) %} 2 | 3 | {# 4 | Support both literal and expression intervals (e.g., column references) 5 | by multiplying an INTERVAL by the value. This avoids DuckDB parser issues 6 | with "interval () " and works across versions. 7 | 8 | Also map unsupported units: 9 | - quarter => 3 months 10 | - week => 7 days (DuckDB supports WEEK as a literal, but keep it explicit) 11 | #} 12 | 13 | {%- set unit = datepart | lower -%} 14 | {%- if unit == 'quarter' -%} 15 | ({{ from_date_or_timestamp }} + (cast({{ interval }} as bigint) * 3) * interval 1 month) 16 | {%- elif unit == 'week' -%} 17 | ({{ from_date_or_timestamp }} + (cast({{ interval }} as bigint) * 7) * interval 1 day) 18 | {%- else -%} 19 | ({{ from_date_or_timestamp }} + cast({{ interval }} as bigint) * interval 1 {{ unit }}) 20 | {%- endif -%} 21 | 22 | {% endmacro %} 23 | -------------------------------------------------------------------------------- /tests/functional/plugins/motherduck/conftest.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | import pytest 4 | import duckdb 5 | import os 6 | 7 | 8 | @pytest.fixture(scope="session") 9 | def test_database_name(): 10 | """Generate a unique database name for the entire motherduck test session""" 11 | date_str = datetime.now().strftime("%Y%m%d") 12 | random_suffix = uuid.uuid4().hex[:6] 13 | db_name = f"test_db_{date_str}_{random_suffix}" 14 | 15 | # Create the database once for all tests 16 | token = os.environ.get("MOTHERDUCK_TOKEN") or os.environ.get("TEST_MOTHERDUCK_TOKEN") 17 | if token: 18 | conn = duckdb.connect(f"md:?motherduck_token={token}") 19 | conn.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}") 20 | conn.close() 21 | 22 | yield db_name 23 | 24 | # Clean up: drop the database after all tests complete 25 | if token: 26 | conn = duckdb.connect(f"md:?motherduck_token={token}") 27 | conn.execute(f"DROP DATABASE IF EXISTS {db_name}") 28 | conn.close() 29 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/incremental_strategy/merge_defaults.sql: -------------------------------------------------------------------------------- 1 | {% macro merge_clause_defaults( 2 | merge_update_condition, 3 | merge_insert_condition, 4 | merge_update_columns=[], 5 | merge_exclude_columns=[], 6 | merge_update_set_expressions={} 7 | ) -%} 8 | 9 | {{ return({ 10 | 'when_matched_update_by_name': { 11 | 'action': 'update', 12 | 'condition': merge_update_condition, 13 | 'mode': 'by_name' 14 | }, 15 | 'when_not_matched_insert_by_name': { 16 | 'action': 'insert', 17 | 'condition': merge_insert_condition, 18 | 'mode': 'by_name' 19 | }, 20 | 'when_matched_update_explicit': { 21 | 'action': 'update', 22 | 'condition': merge_update_condition, 23 | 'mode': 'explicit', 24 | 'update': { 25 | 'include': merge_update_columns, 26 | 'exclude': merge_exclude_columns, 27 | 'set_expressions': merge_update_set_expressions 28 | } 29 | } 30 | }) }} 31 | {%- endmacro %} 32 | -------------------------------------------------------------------------------- /tests/functional/fsspec/test_filesystems.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | from dbt.adapters.duckdb.connections import DuckDBConnectionManager 4 | 5 | models_file_model_sql = """ 6 | {{ config(materialized='table') }} 7 | select * 8 | from read_csv_auto('github://data/team_ratings.csv') 9 | WHERE conf = 'West' 10 | """ 11 | 12 | 13 | @pytest.mark.skip_profile("buenavista", "md") 14 | class TestFilesystems: 15 | @pytest.fixture(scope="class") 16 | def dbt_profile_target(self, dbt_profile_target): 17 | return { 18 | "type": "duckdb", 19 | "path": dbt_profile_target.get("path", ":memory:"), 20 | "filesystems": [ 21 | {"fs": "github", "org": "jwills", "repo": "nba_monte_carlo"} 22 | ], 23 | } 24 | 25 | @pytest.fixture(scope="class") 26 | def models(self): 27 | return { 28 | "file_model.sql": models_file_model_sql, 29 | } 30 | 31 | def test_filesystems(self, project): 32 | DuckDBConnectionManager.close_all_connections() 33 | results = run_dbt() 34 | assert len(results) == 1 35 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/iceberg.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from typing import Dict 3 | 4 | import pyiceberg.catalog 5 | 6 | from . import BasePlugin 7 | from ..utils import SourceConfig 8 | 9 | 10 | class Plugin(BasePlugin): 11 | def initialize(self, config: Dict[str, Any]): 12 | if "catalog" not in config: 13 | raise Exception("'catalog' is a required argument for the iceberg plugin!") 14 | catalog = config.pop("catalog") 15 | self._catalog = pyiceberg.catalog.load_catalog(catalog, **config) 16 | 17 | def load(self, source_config: SourceConfig): 18 | table_format = source_config.get("iceberg_table", "{schema}.{identifier}") 19 | table_name = table_format.format(**source_config.as_dict()) 20 | table = self._catalog.load_table(table_name) 21 | scan_keys = { 22 | "row_filter", 23 | "selected_fields", 24 | "case_sensitive", 25 | "snapshot_id", 26 | "options", 27 | "limit", 28 | } 29 | scan_config = {k: source_config[k] for k in scan_keys if k in source_config} 30 | return table.scan(**scan_config).to_arrow() 31 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/test_date_spine.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.util import ( 4 | run_dbt, 5 | ) 6 | 7 | my_date_spine_model = """ 8 | {{ 9 | config( 10 | materialized = 'table', 11 | ) 12 | }} 13 | 14 | with days as ( 15 | 16 | {{ 17 | dbt_utils.date_spine( 18 | 'day', 19 | "'2024-01-01'::timestamp", 20 | dbt.dateadd('day', 1, "'2024-02-01'::timestamp"), 21 | ) 22 | }} 23 | 24 | ), 25 | 26 | final as ( 27 | select cast(date_day as date) as date_day 28 | from days 29 | ) 30 | 31 | select * from final 32 | """ 33 | 34 | class TestDateSpine: 35 | @pytest.fixture(scope="class") 36 | def models(self): 37 | return { 38 | "date_spine.sql": my_date_spine_model, 39 | } 40 | 41 | @pytest.fixture(scope="class") 42 | def packages(self): 43 | return {"packages": [{"package": "dbt-labs/dbt_utils", "version": "1.1.1"}]} 44 | 45 | def test_date_spine(self, project): 46 | 47 | # install dbt_utils 48 | run_dbt(["deps"]) 49 | # run command 50 | results = run_dbt() 51 | # run result length 52 | assert len(results) == 1 53 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/incremental_strategy/validation_helper.sql: -------------------------------------------------------------------------------- 1 | {%- macro validate_string_field(field_value, field_name, errors) -%} 2 | {%- if field_value is not none and field_value is not string -%} 3 | {%- do errors.append(field_name ~ " must be a string, found: " ~ field_value) -%} 4 | {%- endif -%} 5 | {%- endmacro -%} 6 | 7 | {%- macro validate_string_list_field(field_value, field_name, errors) -%} 8 | {%- if field_value is not none -%} 9 | {%- if field_value is not sequence or field_value is mapping or field_value is string -%} 10 | {%- do errors.append(field_name ~ " must be a list") -%} 11 | {%- else -%} 12 | {%- for item in field_value -%} 13 | {%- if item is not string -%} 14 | {%- do errors.append(field_name ~ " must contain only string values, found: " ~ item) -%} 15 | {%- endif -%} 16 | {%- endfor -%} 17 | {%- endif -%} 18 | {%- endif -%} 19 | {%- endmacro -%} 20 | 21 | {%- macro validate_dict_field(field_value, field_name, errors) -%} 22 | {%- if field_value is not none and field_value is not mapping -%} 23 | {%- do errors.append(field_name ~ " must be a dictionary, found: " ~ field_value) -%} 24 | {%- endif -%} 25 | {%- endmacro -%} 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | venv/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | logs/ 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | test.env 50 | .mypy_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | 68 | #Emacs 69 | *~ 70 | 71 | # Sublime Text 72 | *.sublime-* 73 | 74 | # Vim 75 | *.sw* 76 | 77 | .python-version 78 | 79 | .DS_Store 80 | .idea/ 81 | .vscode/ 82 | .env 83 | 84 | .venv 85 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_unit_testing.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes 4 | from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity 5 | from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput 6 | 7 | 8 | @pytest.mark.skip_profile("buenavista") 9 | class TestUnitTestingTypesDuckDB(BaseUnitTestingTypes): 10 | @pytest.fixture 11 | def data_types(self): 12 | # sql_value, yaml_value 13 | return [ 14 | ["1", "1"], 15 | ["2.0", "2.0"], 16 | ["'12345'", "12345"], 17 | ["'string'", "string"], 18 | ["true", "true"], 19 | ["DATE '2020-01-02'", "2020-01-02"], 20 | ["TIMESTAMP '2013-11-03 00:00:00-0'", "2013-11-03 00:00:00-0"], 21 | ["'2013-11-03 00:00:00-0'::TIMESTAMPTZ", "2013-11-03 00:00:00-0"], 22 | [ 23 | "{'Alberta':'Edmonton','Manitoba':'Winnipeg'}", 24 | "{'Alberta':'Edmonton','Manitoba':'Winnipeg'}", 25 | ], 26 | ["ARRAY['a','b','c']", "['a','b','c']"], 27 | ["ARRAY[1,2,3]", "[1, 2, 3]"], 28 | ] 29 | 30 | 31 | class TestUnitTestCaseInsensitivityDuckDB(BaseUnitTestCaseInsensivity): 32 | pass 33 | 34 | 35 | class TestUnitTestInvalidInputDuckDB(BaseUnitTestInvalidInput): 36 | pass 37 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # For more on configuring pre-commit hooks (see https://pre-commit.com/) 2 | 3 | # TODO: remove global exclusion of tests when testing overhaul is complete 4 | exclude: "^tests/.*" 5 | 6 | 7 | default_language_version: 8 | python: python3.11 9 | 10 | repos: 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v3.2.0 13 | hooks: 14 | - id: check-yaml 15 | args: [--unsafe] 16 | - id: check-json 17 | exclude: ^.devcontainer/ 18 | - id: end-of-file-fixer 19 | - id: trailing-whitespace 20 | - id: check-case-conflict 21 | - repo: https://github.com/asottile/reorder_python_imports 22 | rev: v3.9.0 23 | hooks: 24 | - id: reorder-python-imports 25 | - repo: https://github.com/astral-sh/ruff-pre-commit 26 | # Ruff version. 27 | rev: v0.1.7 28 | hooks: 29 | # Run the linter. 30 | - id: ruff 31 | args: 32 | - "--line-length=99" 33 | - "--fix" 34 | # Run the formatter. 35 | - id: ruff-format 36 | args: 37 | - "--line-length=99" 38 | - repo: https://github.com/pre-commit/mirrors-mypy 39 | rev: v0.782 40 | hooks: 41 | - id: mypy 42 | args: [--show-error-codes, --ignore-missing-imports] 43 | files: ^dbt/adapters/.* 44 | language: system 45 | - id: mypy 46 | alias: mypy-check 47 | stages: [manual] 48 | args: [--show-error-codes, --pretty, --ignore-missing-imports] 49 | files: ^dbt/adapters 50 | language: system 51 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/python-3/.devcontainer/base.Dockerfile 2 | 3 | # [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster 4 | ARG VARIANT="3.10-bullseye" 5 | FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} 6 | 7 | # [Choice] Node.js version: none, lts/*, 16, 14, 12, 10 8 | ARG NODE_VERSION="none" 9 | RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi 10 | 11 | # [Optional] If your pip requirements rarely change, uncomment this section to add them to the image. 12 | # COPY requirements.txt /tmp/pip-tmp/ 13 | # RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \ 14 | # && rm -rf /tmp/pip-tmp 15 | 16 | # [Optional] Uncomment this section to install additional OS packages. 17 | # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 18 | # && apt-get -y install --no-install-recommends 19 | 20 | # [Optional] Uncomment this line to install global node packages. 21 | # RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 22 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/delta.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from typing import Dict 3 | 4 | from deltalake import DeltaTable 5 | 6 | from . import BasePlugin 7 | from ..utils import SourceConfig 8 | 9 | 10 | class Plugin(BasePlugin): 11 | def initialize(self, config: Dict[str, Any]): 12 | pass 13 | 14 | def configure_cursor(self, cursor): 15 | pass 16 | 17 | def load(self, source_config: SourceConfig): 18 | if "delta_table_path" not in source_config: 19 | raise Exception("'delta_table_path' is a required argument for the delta table!") 20 | 21 | table_path = source_config["delta_table_path"] 22 | storage_options = source_config.get("storage_options", None) 23 | 24 | if storage_options: 25 | dt = DeltaTable(table_path, storage_options=storage_options) 26 | else: 27 | dt = DeltaTable(table_path) 28 | 29 | # delta attributes 30 | as_of_version = source_config.get("as_of_version", None) 31 | as_of_datetime = source_config.get("as_of_datetime", None) 32 | 33 | if as_of_version: 34 | dt.load_as_version(as_of_version) 35 | 36 | if as_of_datetime: 37 | dt.load_as_version(as_of_datetime) 38 | 39 | df = dt.to_pyarrow_dataset() 40 | 41 | return df 42 | 43 | def default_materialization(self): 44 | return "view" 45 | 46 | 47 | # Future 48 | # TODO add databricks catalog 49 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = dbt-duckdb 3 | author = Josh Wills 4 | author_email = joshwills+dbt@gmail.com 5 | url = https://github.com/jwills/dbt-duckdb 6 | summary = The duckdb adapter plugin for dbt (data build tool) 7 | description_file = README.md 8 | long_description_content_type = text/markdown 9 | license = Apache-2 10 | classifier = 11 | Development Status :: 5 - Production/Stable 12 | License :: OSI Approved :: Apache Software License 13 | Operating System :: Microsoft :: Windows 14 | Operating System :: MacOS :: MacOS X 15 | Operating System :: POSIX :: Linux 16 | Programming Language :: Python :: 3.10 17 | Programming Language :: Python :: 3.11 18 | Programming Language :: Python :: 3.12 19 | Programming Language :: Python :: 3.13 20 | keywords = 21 | setup 22 | distutils 23 | 24 | [options] 25 | install_requires= 26 | dbt-common>=1,<2 27 | dbt-adapters>=1,<2 28 | duckdb>=1.0.0 29 | # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency 30 | dbt-core>=1.8.0 31 | python_requires = >=3.10 32 | include_package_data = True 33 | packages = find_namespace: 34 | 35 | [options.packages.find] 36 | include = 37 | dbt 38 | dbt.* 39 | 40 | [build-system] 41 | requires = ["setuptools >= 61.2", "pbr>=1.9"] 42 | 43 | [extras] 44 | glue = 45 | boto3 46 | mypy-boto3-glue 47 | md = 48 | duckdb==1.4.2 49 | 50 | [files] 51 | packages = 52 | dbt-duckdb 53 | -------------------------------------------------------------------------------- /tests/unit/test_retries_connect.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import patch 3 | 4 | from duckdb import IOException 5 | 6 | from dbt.adapters.duckdb.credentials import DuckDBCredentials 7 | from dbt.adapters.duckdb.credentials import Retries 8 | from dbt.adapters.duckdb.environments import Environment 9 | 10 | class TestConnectRetries: 11 | 12 | @pytest.fixture 13 | def creds(self): 14 | # Create a mock credentials object 15 | return DuckDBCredentials( 16 | path="foo.db", 17 | retries=Retries(connect_attempts=2, retryable_exceptions=["IOException", "ArithmeticError"]) 18 | ) 19 | 20 | @pytest.mark.parametrize("exception", [None, IOException, ArithmeticError, ValueError]) 21 | def test_initialize_db(self, creds, exception): 22 | # Mocking the duckdb.connect method 23 | with patch('duckdb.connect') as mock_connect: 24 | if exception: 25 | mock_connect.side_effect = [exception, None] 26 | 27 | if exception == ValueError: 28 | with pytest.raises(ValueError) as excinfo: 29 | Environment.initialize_db(creds) 30 | else: 31 | # Call the initialize_db method 32 | Environment.initialize_db(creds) 33 | if exception in {IOException, ArithmeticError}: 34 | assert mock_connect.call_count == creds.retries.connect_attempts 35 | else: 36 | mock_connect.assert_called_once_with(creds.path, read_only=False, config={}) 37 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/catalog.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro duckdb__get_catalog(information_schema, schemas) -%} 3 | {%- call statement('catalog', fetch_result=True) -%} 4 | with relations AS ( 5 | select 6 | t.table_name 7 | , t.database_name 8 | , t.schema_name 9 | , 'BASE TABLE' as table_type 10 | , t.comment as table_comment 11 | from duckdb_tables() t 12 | WHERE t.database_name = '{{ database }}' 13 | UNION ALL 14 | SELECT v.view_name as table_name 15 | , v.database_name 16 | , v.schema_name 17 | , 'VIEW' as table_type 18 | , v.comment as table_comment 19 | from duckdb_views() v 20 | WHERE v.database_name = '{{ database }}' 21 | ) 22 | select 23 | '{{ database }}' as table_database, 24 | r.schema_name as table_schema, 25 | r.table_name, 26 | r.table_type, 27 | r.table_comment, 28 | c.column_name, 29 | c.column_index as column_index, 30 | c.data_type as column_type, 31 | c.comment as column_comment, 32 | NULL as table_owner 33 | FROM relations r JOIN duckdb_columns() c ON r.schema_name = c.schema_name AND r.table_name = c.table_name 34 | WHERE ( 35 | {%- for schema in schemas -%} 36 | upper(r.schema_name) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} 37 | {%- endfor -%} 38 | ) 39 | ORDER BY 40 | r.schema_name, 41 | r.table_name, 42 | c.column_index 43 | {%- endcall -%} 44 | {{ return(load_result('catalog').table) }} 45 | {%- endmacro %} 46 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/environments/motherduck.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from .. import credentials 4 | from .local import DuckDBConnectionWrapper 5 | from .local import LocalEnvironment 6 | from dbt.adapters.contracts.connection import AdapterResponse 7 | 8 | 9 | MOTHERDUCK_SAAS_MODE_QUERY = """ 10 | SELECT value FROM duckdb_settings() WHERE name = 'motherduck_saas_mode' 11 | """ 12 | 13 | 14 | class MotherDuckEnvironment(LocalEnvironment): 15 | def __init__(self, credentials: credentials.DuckDBCredentials): 16 | self._motherduck_saas_mode: Optional[bool] = None 17 | super().__init__(credentials) 18 | 19 | def motherduck_saas_mode(self, handle: DuckDBConnectionWrapper): 20 | # Return cached value 21 | if self._motherduck_saas_mode is True: 22 | return True 23 | # Get SaaS mode from DuckDB config 24 | con = handle.cursor() 25 | (motherduck_saas_mode,) = con.sql(MOTHERDUCK_SAAS_MODE_QUERY).fetchone() 26 | if str(motherduck_saas_mode).lower() in ["1", "true"]: 27 | self._motherduck_saas_mode = True 28 | return True 29 | return False 30 | 31 | def submit_python_job(self, handle, parsed_model: dict, compiled_code: str) -> AdapterResponse: 32 | # Block local file access if SaaS mode is on 33 | if self.motherduck_saas_mode(handle) is True: 34 | raise RuntimeError("Python models are disabled when MotherDuck SaaS Mode is on.") 35 | return super().submit_python_job( 36 | handle=handle, parsed_model=parsed_model, compiled_code=compiled_code 37 | ) 38 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/incremental_strategy/delete_insert.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__get_delete_insert_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) -%} 2 | 3 | {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} 4 | 5 | {% if unique_key %} 6 | {% if unique_key is sequence and unique_key is not string %} 7 | delete from {{target }} as DBT_INCREMENTAL_TARGET 8 | using {{ source }} 9 | where ( 10 | {% for key in unique_key %} 11 | {{ source }}.{{ key }} = DBT_INCREMENTAL_TARGET.{{ key }} 12 | {{ "and " if not loop.last}} 13 | {% endfor %} 14 | {% if incremental_predicates %} 15 | {% for predicate in incremental_predicates %} 16 | and {{ predicate }} 17 | {% endfor %} 18 | {% endif %} 19 | ); 20 | {% else %} 21 | delete from {{ target }} 22 | where ( 23 | {{ unique_key }}) in ( 24 | select ({{ unique_key }}) 25 | from {{ source }} 26 | ) 27 | {%- if incremental_predicates %} 28 | {% for predicate in incremental_predicates %} 29 | and {{ predicate }} 30 | {% endfor %} 31 | {%- endif -%}; 32 | 33 | {% endif %} 34 | {% endif %} 35 | 36 | insert into {{ target }} ({{ dest_cols_csv }}) 37 | ( 38 | select {{ dest_cols_csv }} 39 | from {{ source }} 40 | ) 41 | 42 | {%- endmacro %} 43 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/sqlalchemy.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from typing import Dict 3 | 4 | import pandas as pd 5 | from sqlalchemy import create_engine 6 | from sqlalchemy import text 7 | 8 | from . import BasePlugin 9 | from . import pd_utils 10 | from ..utils import SourceConfig 11 | from ..utils import TargetConfig 12 | 13 | 14 | class Plugin(BasePlugin): 15 | def initialize(self, plugin_config: Dict[str, Any]): 16 | self.engine = create_engine(plugin_config.pop("connection_url"), **plugin_config) 17 | 18 | def load(self, source_config: SourceConfig) -> pd.DataFrame: 19 | if "query" in source_config: 20 | query = source_config["query"] 21 | query = query.format(**source_config.as_dict()) 22 | params = source_config.get("params", {}) 23 | with self.engine.connect() as conn: 24 | return pd.read_sql_query(text(query), con=conn, params=params) 25 | else: 26 | if "table" in source_config: 27 | table = source_config["table"] 28 | else: 29 | table = source_config.table_name() 30 | with self.engine.connect() as conn: 31 | return pd.read_sql_table(table, con=conn) 32 | 33 | def store(self, target_config: TargetConfig): 34 | # first, load the data frame from the external location 35 | df = pd_utils.target_to_df(target_config) 36 | table_name = target_config.relation.identifier 37 | # then, write it to the database 38 | df.to_sql(table_name, self.engine, if_exists="replace", index=False) 39 | 40 | def __del__(self): 41 | self.engine.dispose() 42 | self.engine = None 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 1.4.0 (2023-02-14) 2 | ------------------ 3 | 4 | - Added support for DuckDB 0.7.x and the ability to `ATTACH` additional databases 5 | 6 | 1.3.2 (2022-11-16) 7 | ------------------ 8 | 9 | - Added support for DuckDB 0.6.x 10 | 11 | 1.3.1 (2022-11-07) 12 | ------------------ 13 | 14 | - Support for Python models in dbt-duckdb 15 | - Support for the `external` materialization type 16 | 17 | 1.2.3 (2022-10-24) 18 | ------------------ 19 | 20 | - Added the `settings` dictionary for configuring arbitrary settings in the DuckDB 21 | instance used during the dbt run 22 | 23 | 1.2.2 (2022-10-05) 24 | ------------------ 25 | 26 | - Fixed a small bug in the multithreading implementation 27 | 28 | 1.2.1 (2022-10-03) 29 | ------------------ 30 | 31 | - Added support for multi-threaded dbt-duckdb runs 32 | 33 | 1.2.0 (2022-09-26) 34 | ------------------ 35 | 36 | - Support for loading DuckDB extensions 37 | - Support for reading/writing from S3 via the aforementioned extensions 38 | 39 | 1.1.4 (2022-07-06) 40 | ------------------ 41 | 42 | - Enforces the single-thread limit on the dbt-duckdb profile 43 | 44 | 1.1.3 (2022-06-29) 45 | ------------------ 46 | 47 | - Fixes DuckDB 0.4.0 compatibility issue 48 | 49 | 1.1.2 (2022-06-29) 50 | ------------------ 51 | 52 | - Align with minor version of dbt-core 53 | - Constrain range of compatible duckdb versions 54 | 55 | 1.1.1 (2022-04-06) 56 | ------------------ 57 | 58 | - Fix typo in package description 59 | 60 | 1.1.0 (2022-04-06) 61 | ------------------ 62 | 63 | - Upgraded to DuckDB 0.3.2 64 | - Refactored adapter so that dbt threads > 1 work with DuckDB 65 | 66 | 1.0.0 (2022-01-10) 67 | ------------------ 68 | 69 | - Upgraded to DuckDB 0.3.1 70 | - First basically working version 71 | -------------------------------------------------------------------------------- /tests/functional/plugins/test_glue.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from dbt.tests.adapter.basic.files import ( 4 | base_table_sql, 5 | model_base, 6 | schema_base_yml, 7 | seeds_base_csv, 8 | ) 9 | from dbt.tests.util import ( 10 | run_dbt, 11 | ) 12 | 13 | config_materialized_glue = """ 14 | {{ config(materialized="external", glue_register=true, glue_database='db2') }} 15 | """ 16 | default_glue_sql = config_materialized_glue + model_base 17 | 18 | 19 | @pytest.mark.skip 20 | class TestGlueMaterializations: 21 | @pytest.fixture(scope="class") 22 | def models(self): 23 | return { 24 | "table_model.sql": base_table_sql, 25 | "table_default.sql": default_glue_sql, 26 | "schema.yml": schema_base_yml, 27 | } 28 | 29 | @pytest.fixture(scope="class") 30 | def seeds(self): 31 | return { 32 | "base.csv": seeds_base_csv, 33 | } 34 | 35 | @pytest.fixture(scope="class") 36 | def dbt_profile_target(self, dbt_profile_target): 37 | dbt_profile_target["external_root"] = "s3://duckdbtest/glue_test" 38 | dbt_profile_target["extensions"] = ["httpfs"] 39 | dbt_profile_target["settings"] = { 40 | "s3_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), 41 | "s3_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), 42 | "s3_region": "us-west-2", 43 | } 44 | return dbt_profile_target 45 | 46 | def test_base(self, project): 47 | # seed command 48 | results = run_dbt(["seed"]) 49 | # seed result length 50 | assert len(results) == 1 51 | 52 | # run command 53 | results = run_dbt() 54 | # run result length 55 | assert len(results) == 2 56 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/persist_docs.sql: -------------------------------------------------------------------------------- 1 | 2 | {# 3 | The logic in this file is adapted from dbt-postgres, since DuckDB matches 4 | the Postgres relation/column commenting model as of 0.10.1 5 | #} 6 | 7 | {# 8 | By using dollar-quoting like this, users can embed anything they want into their comments 9 | (including nested dollar-quoting), as long as they do not use this exact dollar-quoting 10 | label. It would be nice to just pick a new one but eventually you do have to give up. 11 | #} 12 | {% macro duckdb_escape_comment(comment) -%} 13 | {% if comment is not string %} 14 | {% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %} 15 | {% endif %} 16 | {%- set magic = '$dbt_comment_literal_block$' -%} 17 | {%- if magic in comment -%} 18 | {%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%} 19 | {%- endif -%} 20 | {{ magic }}{{ comment }}{{ magic }} 21 | {%- endmacro %} 22 | 23 | {% macro duckdb__alter_relation_comment(relation, comment) %} 24 | {% set escaped_comment = duckdb_escape_comment(comment) %} 25 | comment on {{ relation.type }} {{ relation }} is {{ escaped_comment }}; 26 | {% endmacro %} 27 | 28 | 29 | {% macro duckdb__alter_column_comment(relation, column_dict) %} 30 | {% set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list %} 31 | {% for column_name in column_dict if (column_name in existing_columns) %} 32 | {% set comment = column_dict[column_name]['description'] %} 33 | {% set escaped_comment = duckdb_escape_comment(comment) %} 34 | comment on column {{ relation }}.{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} is {{ escaped_comment }}; 35 | {% endfor %} 36 | {% endmacro %} 37 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_community_extensions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import ( 3 | check_relation_types, 4 | check_relations_equal, 5 | check_result_nodes_by_name, 6 | relation_from_name, 7 | run_dbt, 8 | ) 9 | 10 | @pytest.mark.skip_profile("buenavista", "nightly", reason="Cannot install community extensions for nightly release") 11 | class BaseCommunityExtensions: 12 | 13 | @pytest.fixture(scope="class") 14 | def dbt_profile_target(self, dbt_profile_target): 15 | dbt_profile_target["extensions"] = [ 16 | {"name": "quack", "repo": "community"}, 17 | ] 18 | return dbt_profile_target 19 | 20 | @pytest.fixture(scope="class") 21 | def models(self): 22 | return { 23 | "quack_model.sql": "select quack('world') as quack_world", 24 | } 25 | 26 | @pytest.fixture(scope="class") 27 | def project_config_update(self): 28 | return { 29 | "name": "base", 30 | } 31 | 32 | def test_base(self, project): 33 | 34 | # run command 35 | results = run_dbt() 36 | # run result length 37 | assert len(results) == 1 38 | 39 | # names exist in result nodes 40 | check_result_nodes_by_name( 41 | results, 42 | [ 43 | "quack_model", 44 | ], 45 | ) 46 | 47 | # check relation types 48 | expected = { 49 | "quack_model": "view", 50 | } 51 | check_relation_types(project.adapter, expected) 52 | 53 | @pytest.mark.skip_profile("nightly", reason="Cannot install community extensions for nightly release") 54 | @pytest.mark.skip_profile("buenavista") 55 | class TestCommunityExtensions(BaseCommunityExtensions): 56 | pass 57 | -------------------------------------------------------------------------------- /tests/functional/adapter/indexes/fixtures.py: -------------------------------------------------------------------------------- 1 | models__incremental_sql = """ 2 | {{ 3 | config( 4 | materialized = "incremental", 5 | indexes=[ 6 | {'columns': ['column_a']}, 7 | {'columns': ['column_a', 'column_b'], 'unique': True}, 8 | ] 9 | ) 10 | }} 11 | 12 | select * 13 | from ( 14 | select 1 as column_a, 2 as column_b 15 | ) t 16 | 17 | {% if is_incremental() %} 18 | where column_a > (select max(column_a) from {{this}}) 19 | {% endif %} 20 | 21 | """ 22 | 23 | models__table_sql = """ 24 | {{ 25 | config( 26 | materialized = "table", 27 | indexes=[ 28 | {'columns': ['column_a']}, 29 | {'columns': ['column_b']}, 30 | {'columns': ['column_a', 'column_b']}, 31 | {'columns': ['column_b', 'column_a'], 'unique': True}, 32 | {'columns': ['column_a']} 33 | ] 34 | ) 35 | }} 36 | 37 | select 1 as column_a, 2 as column_b 38 | 39 | """ 40 | 41 | snapshots__colors_sql = """ 42 | {% snapshot colors %} 43 | 44 | {{ 45 | config( 46 | target_database=database, 47 | target_schema=schema, 48 | unique_key='id', 49 | strategy='check', 50 | check_cols=['color'], 51 | indexes=[ 52 | {'columns': ['id']}, 53 | {'columns': ['id', 'color'], 'unique': True}, 54 | ] 55 | ) 56 | }} 57 | 58 | {% if var('version') == 1 %} 59 | 60 | select 1 as id, 'red' as color union all 61 | select 2 as id, 'green' as color 62 | 63 | {% else %} 64 | 65 | select 1 as id, 'blue' as color union all 66 | select 2 as id, 'green' as color 67 | 68 | {% endif %} 69 | 70 | {% endsnapshot %} 71 | 72 | """ 73 | 74 | seeds__seed_csv = """country_code,country_name 75 | US,United States 76 | CA,Canada 77 | GB,United Kingdom 78 | """ 79 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_hooks.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import pytest 3 | from dbt.tests.util import run_dbt, relation_from_name 4 | 5 | basic_model_sql = """ 6 | select range from range(3) 7 | """ 8 | 9 | test_table = f"test_table_{str(uuid.uuid1()).replace('-', '_')}" 10 | 11 | post_hook_sql = f"create table {test_table} as select 1;" 12 | 13 | 14 | class TestPostHook: 15 | """ 16 | Post hook should run inside txn 17 | """ 18 | 19 | @pytest.fixture(scope="class") 20 | def project_config_update(self): 21 | return { 22 | "name": "base", 23 | "models": {"post-hook": [{"sql": post_hook_sql}]}, 24 | } 25 | 26 | @pytest.fixture(scope="class") 27 | def models(self): 28 | return { 29 | "basic_model.sql": basic_model_sql, 30 | } 31 | 32 | def test_run(self, project): 33 | run_dbt(["run"]) 34 | 35 | # check that the model was run 36 | relation = relation_from_name(project.adapter, "basic_model") 37 | result = project.run_sql( 38 | f"select count(*) as num_rows from {relation}", fetch="one" 39 | ) 40 | assert result[0] == 3 41 | 42 | # check that the post hook was run 43 | result = project.run_sql( 44 | f"select count(*) as num_rows from {test_table}", fetch="one" 45 | ) 46 | assert result[0] == 1 47 | 48 | # reset 49 | project.run_sql(f"drop table {test_table}") 50 | 51 | 52 | class TestPostHookTransactionFalse(TestPostHook): 53 | """ 54 | Post hook should run outside txn 55 | """ 56 | 57 | @pytest.fixture(scope="class") 58 | def project_config_update(self): 59 | return { 60 | "name": "base", 61 | "models": {"post-hook": [{"sql": post_hook_sql, "transaction": False}]}, 62 | } 63 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/snapshot_helper.sql: -------------------------------------------------------------------------------- 1 | {% macro duckdb__snapshot_merge_sql(target, source, insert_cols) -%} 2 | {%- set insert_cols_csv = insert_cols | join(', ') -%} 3 | 4 | {%- set columns = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() -%} 5 | 6 | update {{ target }} as DBT_INTERNAL_TARGET 7 | set {{ columns.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ columns.dbt_valid_to }} 8 | from {{ source }} as DBT_INTERNAL_SOURCE 9 | where DBT_INTERNAL_SOURCE.{{ columns.dbt_scd_id }}::text = DBT_INTERNAL_TARGET.{{ columns.dbt_scd_id }}::text 10 | and DBT_INTERNAL_SOURCE.dbt_change_type::text in ('update'::text, 'delete'::text) 11 | {% if config.get("dbt_valid_to_current") %} 12 | and (DBT_INTERNAL_TARGET.{{ columns.dbt_valid_to }} = {{ config.get('dbt_valid_to_current') }} or DBT_INTERNAL_TARGET.{{ columns.dbt_valid_to }} is null); 13 | {% else %} 14 | and DBT_INTERNAL_TARGET.{{ columns.dbt_valid_to }} is null; 15 | {% endif %} 16 | 17 | insert into {{ target }} ({{ insert_cols_csv }}) 18 | select {% for column in insert_cols -%} 19 | DBT_INTERNAL_SOURCE.{{ column }} {%- if not loop.last %}, {%- endif %} 20 | {%- endfor %} 21 | from {{ source }} as DBT_INTERNAL_SOURCE 22 | where DBT_INTERNAL_SOURCE.dbt_change_type::text = 'insert'::text; 23 | 24 | {% endmacro %} 25 | 26 | {% macro build_snapshot_staging_table(strategy, sql, target_relation) %} 27 | {% set temp_relation = make_temp_relation(target_relation) %} 28 | 29 | {% set select = snapshot_staging_table(strategy, sql, target_relation) %} 30 | 31 | {% call statement('build_snapshot_staging_relation') %} 32 | {{ create_table_as(False, temp_relation, select) }} 33 | {% endcall %} 34 | 35 | {% do return(temp_relation) %} 36 | {% endmacro %} 37 | 38 | {% macro duckdb__post_snapshot(staging_relation) %} 39 | {% do return(drop_relation(staging_relation)) %} 40 | {% endmacro %} 41 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/utils/upstream.sql: -------------------------------------------------------------------------------- 1 | {%- macro register_upstream_external_models() -%} 2 | {% if execute %} 3 | {% set upstream_nodes = {} %} 4 | {% set upstream_schemas = {} %} 5 | {% for node in selected_resources %} 6 | {% if node not in graph['nodes'] %}{% continue %}{% endif %} 7 | {% for upstream_node in graph['nodes'][node]['depends_on']['nodes'] %} 8 | {% if upstream_node not in upstream_nodes and upstream_node not in selected_resources %} 9 | {% do upstream_nodes.update({upstream_node: None}) %} 10 | {% set upstream = graph['nodes'].get(upstream_node) %} 11 | {% if upstream 12 | and upstream.resource_type in ('model', 'seed') 13 | and upstream.config.materialized=='external' 14 | %} 15 | {%- set upstream_rel = api.Relation.create( 16 | database=upstream['database'], 17 | schema=upstream['schema'], 18 | identifier=upstream['alias'] 19 | ) -%} 20 | {%- set location = upstream.config.get('location', external_location(upstream_rel, upstream.config)) -%} 21 | {%- set rendered_options = render_write_options(upstream.config) -%} 22 | {%- set upstream_location = adapter.external_read_location(location, rendered_options) -%} 23 | {% if upstream_rel.schema not in upstream_schemas %} 24 | {% call statement('main', language='sql') -%} 25 | create schema if not exists {{ upstream_rel.without_identifier() }} 26 | {%- endcall %} 27 | {% do upstream_schemas.update({upstream_rel.schema: None}) %} 28 | {% endif %} 29 | {% call statement('main', language='sql') -%} 30 | create or replace view {{ upstream_rel }} as ( 31 | select * from '{{ upstream_location }}' 32 | ); 33 | {%- endcall %} 34 | {%- endif %} 35 | {% endif %} 36 | {% endfor %} 37 | {% endfor %} 38 | {% if upstream_schemas %} 39 | {% do adapter.commit() %} 40 | {% endif %} 41 | {% endif %} 42 | {%- endmacro -%} 43 | -------------------------------------------------------------------------------- /tests/functional/plugins/test_sqlite.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlite3 3 | from pathlib import Path 4 | from dbt.tests.util import ( 5 | run_dbt, 6 | ) 7 | 8 | model_sql = """ 9 | {{ config(materialized='incremental', database='satest') }} 10 | select * from satest.tt1 11 | """ 12 | 13 | 14 | class TestSQLitePlugin: 15 | 16 | @pytest.fixture(scope="class") 17 | def sqlite_test_db(self): 18 | path = '/tmp/satest.db' 19 | Path(path).unlink(missing_ok=True) 20 | db = sqlite3.connect(path) 21 | cursor = db.cursor() 22 | cursor.execute("CREATE TABLE tt1 (id int, name text)") 23 | cursor.execute("INSERT INTO tt1 VALUES (1, 'John Doe')") 24 | cursor.execute("INSERT INTO tt1 VALUES (2, 'Jane Smith')") 25 | cursor.execute("CREATE TABLE test_table2 (a int, b int, c int)") 26 | cursor.execute("INSERT INTO test_table2 VALUES (1, 2, 3), (4, 5, 6)") 27 | cursor.close() 28 | db.commit() 29 | db.close() 30 | 31 | yield path 32 | 33 | @pytest.fixture(scope="class") 34 | def profiles_config_update(self, dbt_profile_target, sqlite_test_db): 35 | return { 36 | "test": { 37 | "outputs": { 38 | "dev": { 39 | "type": "duckdb", 40 | "path": dbt_profile_target.get("path", ":memory:"), 41 | "attach": [ 42 | {'path': sqlite_test_db} 43 | ] 44 | } 45 | }, 46 | "target": "dev", 47 | } 48 | } 49 | 50 | @pytest.fixture(scope="class") 51 | def models(self, test_data_path): 52 | return { 53 | "read_write.sql": model_sql, 54 | 55 | } 56 | 57 | def test_sqlite_plugin(self, project): 58 | results = run_dbt() 59 | assert len(results) == 1 60 | 61 | res = project.run_sql("SELECT COUNT(1) FROM satest.read_write", fetch="one") 62 | assert res[0] == 2 63 | 64 | 65 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/seed.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro duckdb__get_binding_char() %} 3 | {{ return(adapter.get_binding_char()) }} 4 | {% endmacro %} 5 | 6 | {% macro duckdb__get_batch_size() %} 7 | {{ return(10000) }} 8 | {% endmacro %} 9 | 10 | {% macro duckdb__load_csv_rows(model, agate_table) %} 11 | {% if config.get('fast', true) %} 12 | {% set seed_file_path = adapter.get_seed_file_path(model) %} 13 | {% set delimiter = config.get('delimiter', ',') %} 14 | {% set sql %} 15 | COPY {{ this.render() }} FROM '{{ seed_file_path }}' (FORMAT CSV, HEADER TRUE, DELIMITER '{{ delimiter }}') 16 | {% endset %} 17 | {% do adapter.add_query(sql, abridge_sql_log=True) %} 18 | {{ return(sql) }} 19 | {% endif %} 20 | 21 | {% set batch_size = get_batch_size() %} 22 | {% set agate_table = adapter.convert_datetimes_to_strs(agate_table) %} 23 | {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} 24 | {% set bindings = [] %} 25 | 26 | {% set statements = [] %} 27 | 28 | {% for chunk in agate_table.rows | batch(batch_size) %} 29 | {% set bindings = [] %} 30 | 31 | {% for row in chunk %} 32 | {% do bindings.extend(row) %} 33 | {% endfor %} 34 | 35 | {% set sql %} 36 | insert into {{ this.render() }} ({{ cols_sql }}) values 37 | {% for row in chunk -%} 38 | ({%- for column in agate_table.column_names -%} 39 | {{ get_binding_char() }} 40 | {%- if not loop.last%},{%- endif %} 41 | {%- endfor -%}) 42 | {%- if not loop.last%},{%- endif %} 43 | {%- endfor %} 44 | {% endset %} 45 | 46 | {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} 47 | 48 | {% if loop.index0 == 0 %} 49 | {% do statements.append(sql) %} 50 | {% endif %} 51 | {% endfor %} 52 | 53 | {# Return SQL so we can render it out into the compiled files #} 54 | {{ return(statements[0]) }} 55 | {% endmacro %} 56 | -------------------------------------------------------------------------------- /tests/bv_test_server.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import os 3 | import tempfile 4 | 5 | from buenavista.backends.duckdb import DuckDBConnection 6 | from buenavista.core import BVType, Extension, Session, QueryResult, SimpleQueryResult 7 | from buenavista.postgres import BuenaVistaServer 8 | 9 | from dbt.adapters.duckdb.credentials import DuckDBCredentials 10 | from dbt.adapters.duckdb.environments import Environment 11 | 12 | 13 | class TestPythonRunner(Extension): 14 | def type(self) -> str: 15 | return "dbt_python_job" 16 | 17 | def apply(self, params: dict, handle: Session) -> QueryResult: 18 | mod_file = tempfile.NamedTemporaryFile(suffix=".py", delete=False) 19 | mod_file.write(params["module_definition"].lstrip().encode("utf-8")) 20 | mod_file.close() 21 | try: 22 | spec = importlib.util.spec_from_file_location( 23 | params["module_name"], 24 | mod_file.name, 25 | ) 26 | if not spec: 27 | raise Exception("Failed to load python model as module") 28 | module = importlib.util.module_from_spec(spec) 29 | if spec.loader: 30 | spec.loader.exec_module(module) 31 | else: 32 | raise Exception("Module spec did not include a loader") 33 | # Do the actual work to run the code here 34 | cursor = handle.cursor() 35 | dbt = module.dbtObj(handle.load_df_function) 36 | df = module.model(dbt, cursor) 37 | module.materialize(df, cursor) 38 | return SimpleQueryResult("msg", "Success", BVType.TEXT) 39 | finally: 40 | os.unlink(mod_file.name) 41 | 42 | 43 | def create(): 44 | config = {"path": ":memory:", "type": "duckdb"} 45 | creds = DuckDBCredentials.from_dict(config) 46 | db = Environment.initialize_db(creds) 47 | conn = DuckDBConnection(db) 48 | server = BuenaVistaServer( 49 | ("localhost", 5433), conn, extensions=[TestPythonRunner()] 50 | ) 51 | return server 52 | 53 | 54 | if __name__ == "__main__": 55 | server = create() 56 | server.serve_forever() 57 | -------------------------------------------------------------------------------- /tests/functional/adapter/simple_seed/test_fast_seed.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.simple_seed.test_seed import SeedTestBase 4 | from dbt.tests.adapter.simple_seed.test_seed import SeedUniqueDelimiterTestBase 5 | from dbt.tests.util import ( 6 | run_dbt, 7 | ) 8 | 9 | class TestSeedConfigFast(SeedTestBase): 10 | @pytest.fixture(scope="class") 11 | def project_config_update(self): 12 | return { 13 | "seeds": {"quote_columns": False, "fast": True} 14 | } 15 | 16 | def test_simple_seed_fast(self, project): 17 | self._build_relations_for_test(project) 18 | self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True) 19 | 20 | 21 | class TestSeedWithUniqueDelimiter(SeedUniqueDelimiterTestBase): 22 | def test_seed_with_unique_delimiter(self, project): 23 | """Testing correct run of seeds with a unique delimiter (pipe in this case)""" 24 | self._build_relations_for_test(project) 25 | self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True) 26 | 27 | 28 | class TestSeedWithWrongDelimiter(SeedUniqueDelimiterTestBase): 29 | @pytest.fixture(scope="class") 30 | def project_config_update(self): 31 | return { 32 | "seeds": {"quote_columns": False, "delimiter": ";"}, 33 | } 34 | 35 | def test_seed_with_wrong_delimiter(self, project): 36 | """Testing failure of running dbt seed with a wrongly configured delimiter""" 37 | seed_result = run_dbt(["seed"], expect_pass=False) 38 | assert "syntax error" in seed_result.results[0].message.lower() 39 | 40 | 41 | class TestSeedWithEmptyDelimiter(SeedUniqueDelimiterTestBase): 42 | @pytest.fixture(scope="class") 43 | def project_config_update(self): 44 | return { 45 | "seeds": {"quote_columns": False, "delimiter": ""}, 46 | } 47 | 48 | def test_seed_with_empty_delimiter(self, project): 49 | """Testing failure of running dbt seed with an empty configured delimiter value""" 50 | seed_result = run_dbt(["seed"], expect_pass=False) 51 | assert "compilation error" in seed_result.results[0].message.lower() 52 | -------------------------------------------------------------------------------- /tests/functional/plugins/test_iceberg.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.util import ( 4 | check_relations_equal, 5 | run_dbt, 6 | ) 7 | 8 | sources_schema_yml = """ 9 | version: 2 10 | sources: 11 | - name: iceberg_source 12 | schema: main 13 | config: 14 | plugin: iceberg 15 | iceberg_table: "examples.{identifier}" 16 | tables: 17 | - name: nyc_taxi_locations 18 | """ 19 | 20 | models_source_model1_sql = """ 21 | select * from {{ source('iceberg_source', 'nyc_taxi_locations') }} 22 | """ 23 | 24 | 25 | # Skipping this b/c it requires using my (@jwills) personal creds 26 | # when testing it locally and also b/c I think there is something 27 | # wrong with profiles_config_update since it can't be used in multiple 28 | # tests in the same pytest session 29 | @pytest.mark.skip 30 | class TestIcebergPlugin: 31 | @pytest.fixture(scope="class") 32 | def profiles_config_update(self, dbt_profile_target): 33 | config = {"catalog": "default"} 34 | if "path" not in dbt_profile_target: 35 | return {} 36 | return { 37 | "test": { 38 | "outputs": { 39 | "dev": { 40 | "type": "duckdb", 41 | "path": dbt_profile_target["path"], 42 | "plugins": [ 43 | {"module": "iceberg", "config": config} 44 | ], 45 | } 46 | }, 47 | "target": "dev", 48 | } 49 | } 50 | 51 | @pytest.fixture(scope="class") 52 | def models(self): 53 | return { 54 | "schema.yml": sources_schema_yml, 55 | "source_model1.sql": models_source_model1_sql, 56 | } 57 | 58 | def test_iceberg_plugin(self, project): 59 | results = run_dbt() 60 | assert len(results) == 1 61 | 62 | res = project.run_sql("SELECT COUNT(1) FROM nyc_taxi_locations", fetch="one") 63 | assert res[0] == 265 64 | 65 | check_relations_equal( 66 | project.adapter, 67 | [ 68 | "nyc_taxi_locations", 69 | "source_model1", 70 | ], 71 | ) -------------------------------------------------------------------------------- /tests/functional/plugins/test_excel.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pytest 3 | 4 | from dbt.tests.util import ( 5 | check_relations_equal, 6 | run_dbt, 7 | ) 8 | 9 | schema_yml = """ 10 | version: 2 11 | sources: 12 | - name: excel_source 13 | schema: main 14 | meta: 15 | plugin: excel 16 | tables: 17 | - name: excel_file 18 | description: "An excel file" 19 | meta: 20 | external_location: "{test_data_path}/excel_file.xlsx" 21 | """ 22 | 23 | plugins = [ 24 | { 25 | "module": "excel", 26 | "config": { 27 | "output": { 28 | "engine": "openpyxl", 29 | "file": "/tmp/excel_file_out.xlsx", 30 | "lazy_close": False 31 | } 32 | } 33 | }, 34 | ] 35 | 36 | model_sql = """ 37 | {{ config(materialized='external', plugin='excel') }} 38 | select * from {{ source('excel_source', 'excel_file') }} 39 | """ 40 | 41 | 42 | class TestExcelPlugin: 43 | @pytest.fixture(scope="class") 44 | def profiles_config_update(self, dbt_profile_target): 45 | return { 46 | "test": { 47 | "outputs": { 48 | "dev": { 49 | "type": "duckdb", 50 | "path": dbt_profile_target.get("path", ":memory:"), 51 | "plugins": plugins, 52 | } 53 | }, 54 | "target": "dev", 55 | } 56 | } 57 | 58 | @pytest.fixture(scope="class") 59 | def models(self, test_data_path): 60 | return { 61 | "schema_excel.yml": schema_yml.format(test_data_path=test_data_path), 62 | "excel_read_write.sql": model_sql, 63 | } 64 | 65 | def test_excel_plugin(self, project): 66 | results = run_dbt() 67 | assert len(results) == 1 68 | 69 | res = project.run_sql("SELECT COUNT(1) FROM excel_file", fetch="one") 70 | assert res[0] == 9 71 | 72 | df = pandas.read_excel('/tmp/excel_file_out.xlsx') 73 | assert df.shape[0] == 9 74 | assert df['First Name'].iloc[0] == 'Dulce' 75 | 76 | check_relations_equal( 77 | project.adapter, 78 | [ 79 | "excel_file", 80 | "excel_read_write", 81 | ], 82 | ) 83 | 84 | 85 | -------------------------------------------------------------------------------- /tests/unit/test_retries_query.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import MagicMock 3 | from unittest.mock import patch 4 | 5 | from duckdb import IOException 6 | 7 | from dbt.adapters.duckdb.credentials import Retries 8 | from dbt.adapters.duckdb.environments import RetryableCursor 9 | 10 | class TestRetryableCursor: 11 | 12 | @pytest.fixture 13 | def mock_cursor(self): 14 | return MagicMock() 15 | 16 | @pytest.fixture 17 | def mock_retries(self): 18 | return Retries(query_attempts=3) 19 | 20 | @pytest.fixture 21 | def retry_cursor(self, mock_cursor, mock_retries): 22 | return RetryableCursor( 23 | mock_cursor, 24 | mock_retries.query_attempts, 25 | mock_retries.retryable_exceptions) 26 | 27 | def test_successful_execute(self, mock_cursor, retry_cursor): 28 | """ Test that execute successfully runs the SQL query. """ 29 | sql_query = "SELECT * FROM table" 30 | retry_cursor.execute(sql_query) 31 | mock_cursor.execute.assert_called_once_with(sql_query) 32 | 33 | def test_retry_on_failure(self, mock_cursor, retry_cursor): 34 | """ Test that execute retries the SQL query on failure. """ 35 | mock_cursor.execute.side_effect = [IOException, None] 36 | sql_query = "SELECT * FROM table" 37 | retry_cursor.execute(sql_query) 38 | assert mock_cursor.execute.call_count == 2 39 | 40 | def test_no_retry_on_non_retryable_exception(self, mock_cursor, retry_cursor): 41 | """ Test that a non-retryable exception is not retried. """ 42 | mock_cursor.execute.side_effect = ValueError 43 | sql_query = "SELECT * FROM table" 44 | with pytest.raises(ValueError): 45 | retry_cursor.execute(sql_query) 46 | mock_cursor.execute.assert_called_once_with(sql_query) 47 | 48 | def test_exponential_backoff(self, mock_cursor, retry_cursor): 49 | """ Test that exponential backoff is applied between retries. """ 50 | mock_cursor.execute.side_effect = [IOException, IOException, None] 51 | sql_query = "SELECT * FROM table" 52 | 53 | with patch("time.sleep") as mock_sleep: 54 | retry_cursor.execute(sql_query) 55 | assert mock_sleep.call_count == 2 56 | mock_sleep.assert_any_call(1) 57 | mock_sleep.assert_any_call(2) 58 | -------------------------------------------------------------------------------- /tests/unit/test_get_column_schema.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from argparse import Namespace 3 | from unittest import mock 4 | 5 | from dbt.flags import set_from_args 6 | from dbt.adapters.duckdb import DuckDBAdapter 7 | from tests.unit.utils import config_from_parts_or_dicts 8 | 9 | 10 | class TestDuckDBAdapterGetColumnSchemaFromQuery(unittest.TestCase): 11 | def setUp(self): 12 | set_from_args(Namespace(STRICT_MODE=True), {}) 13 | 14 | profile_cfg = { 15 | "outputs": { 16 | "test": { 17 | "type": "duckdb", 18 | "path": ":memory:", 19 | } 20 | }, 21 | "target": "test", 22 | } 23 | 24 | project_cfg = { 25 | "name": "X", 26 | "version": "0.1", 27 | "profile": "test", 28 | "project-root": "/tmp/dbt/does-not-exist", 29 | "quoting": { 30 | "identifier": False, 31 | "schema": True, 32 | }, 33 | "config-version": 2, 34 | } 35 | 36 | self.config = config_from_parts_or_dicts(project_cfg, profile_cfg, cli_vars={}) 37 | self.mock_mp_context = mock.MagicMock() 38 | self._adapter = None 39 | 40 | @property 41 | def adapter(self): 42 | if self._adapter is None: 43 | self._adapter = DuckDBAdapter(self.config, self.mock_mp_context) 44 | return self._adapter 45 | 46 | def test_get_column_schema_from_query_with_struct(self): 47 | """Test get_column_schema_from_query flattens struct columns.""" 48 | mock_cursor = mock.MagicMock() 49 | mock_cursor.fetchall.return_value = [ 50 | ("id", "INTEGER"), 51 | ("user_data", "STRUCT(name VARCHAR, age INTEGER)") 52 | ] 53 | 54 | with mock.patch.object(self.adapter.connections, 'add_select_query', return_value=(None, mock_cursor)): 55 | result = self.adapter.get_column_schema_from_query("SELECT * FROM test_table") 56 | 57 | # Verify result contains flattened columns (1 simple + 2 from struct) 58 | self.assertEqual(len(result), 3) 59 | self.assertEqual(result[0].column, "id") 60 | self.assertEqual(result[1].column, "user_data.name") 61 | self.assertEqual(result[2].column, "user_data.age") 62 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = py{310,311,312,313} 4 | 5 | [testenv:{unit,py310,py311,py312,py313,py}] 6 | description = unit testing 7 | skip_install = True 8 | passenv = * 9 | commands = {envpython} -m pytest {posargs} tests/unit 10 | deps = 11 | -rdev-requirements.txt 12 | -e. 13 | 14 | [testenv:{functional,py310,py311,py312,py313,py}] 15 | description = adapter functional testing 16 | skip_install = True 17 | passenv = * 18 | commands = {envpython} -m pytest {posargs} tests/functional/adapter 19 | deps = 20 | -rdev-requirements.txt 21 | -e. 22 | 23 | [testenv:{filebased,py310,py311,py312,py313,py}] 24 | description = adapter functional testing using file-based DBs 25 | skip_install = True 26 | passenv = * 27 | commands = {envpython} -m pytest --profile=file {posargs} tests/functional/adapter 28 | deps = 29 | -rdev-requirements.txt 30 | -e. 31 | 32 | [testenv:{buenavista,py310}] 33 | description = adapter functional testing using a Buena Vista server 34 | skip_install = True 35 | passenv = * 36 | commands = {envpython} -m pytest --profile=buenavista {posargs} tests/functional/adapter 37 | deps = 38 | -rdev-requirements.txt 39 | -e. 40 | 41 | [testenv:{md,py311}] 42 | description = adapter function testing using MotherDuck 43 | skip_install = True 44 | passenv = * 45 | commands = {envpython} -m pytest --profile=md --maxfail=2 {posargs} tests/functional/plugins/motherduck tests/functional/adapter 46 | deps = 47 | duckdb==1.4.2 48 | -rdev-requirements.txt 49 | -e.[md] 50 | 51 | [testenv:{fsspec,py310,py311,py312,py313,py}] 52 | description = adapter fsspec testing 53 | skip_install = True 54 | passenv = * 55 | commands = {envpython} -m pytest {posargs} tests/functional/fsspec 56 | deps = 57 | -rdev-requirements.txt 58 | -e. 59 | 60 | [testenv:{plugins,py310,py311,py312,py313,py}] 61 | description = adapter plugin testing 62 | skip_install = True 63 | passenv = * 64 | commands = {envpython} -m pytest {posargs} --profile=file tests/functional/plugins 65 | deps = 66 | duckdb==1.4.2 67 | -rdev-requirements.txt 68 | -e. 69 | 70 | [testenv:{nightly,py310,py311,py312,py313,py}] 71 | description = duckdb nightly release testing 72 | skip_install = True 73 | passenv = * 74 | commands = 75 | {envpython} -m pip install --upgrade --pre duckdb 76 | {envpython} -m pip show duckdb 77 | {envpython} -m pytest {posargs} --profile=nightly tests/unit tests/functional/adapter 78 | deps = 79 | -rdev-requirements.txt 80 | -e. 81 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_sources.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dbt.tests.util import run_dbt 5 | 6 | sources_schema_yml = """version: 2 7 | sources: 8 | - name: external_source 9 | config: 10 | external_location: "/tmp/{name}_{extra}.csv" 11 | tables: 12 | - name: seeds_source 13 | description: "A source table" 14 | config: 15 | extra: 'something' 16 | columns: 17 | - name: id 18 | description: "An id" 19 | tests: 20 | - unique 21 | - not_null 22 | - name: seeds_ost 23 | identifier: "seeds_other_source_table" 24 | config: 25 | external_location: "read_csv_auto('/tmp/%(identifier)s.csv')" 26 | formatter: oldstyle 27 | - name: seeds_other_source_table 28 | config: 29 | external_location: "read_csv_auto('/tmp/${name}.csv')" 30 | formatter: template 31 | """ 32 | 33 | models_source_model_sql = """select * from {{ source('external_source', 'seeds_source') }} 34 | """ 35 | 36 | models_multi_source_model_sql = """select s.* from {{ source('external_source', 'seeds_source') }} s 37 | inner join {{ source('external_source', 'seeds_ost') }} oldstyle USING (id) 38 | inner join {{ source('external_source', 'seeds_other_source_table') }} tmpl USING (id) 39 | """ 40 | 41 | 42 | class TestExternalSources: 43 | @pytest.fixture(scope="class") 44 | def models(self): 45 | return { 46 | "schema.yml": sources_schema_yml, 47 | "source_model.sql": models_source_model_sql, 48 | "multi_source_model.sql": models_multi_source_model_sql, 49 | } 50 | 51 | @pytest.fixture(scope="class") 52 | def seeds_source_file(self): 53 | with open("/tmp/seeds_source_something.csv", "w") as f: 54 | f.write("id,a,b\n1,2,3\n4,5,6\n7,8,9") 55 | yield 56 | os.unlink("/tmp/seeds_source_something.csv") 57 | 58 | @pytest.fixture(scope="class") 59 | def ost_file(self): 60 | with open("/tmp/seeds_other_source_table.csv", "w") as f: 61 | f.write("id,c,d\n1,2,3\n4,5,6\n7,8,9") 62 | yield 63 | os.unlink("/tmp/seeds_other_source_table.csv") 64 | 65 | def test_external_sources(self, seeds_source_file, ost_file, project): 66 | results = run_dbt(["run"]) 67 | assert len(results) == 2 68 | test_results = run_dbt(["test"]) 69 | assert len(test_results) == 2 70 | -------------------------------------------------------------------------------- /tests/unit/test_external_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from argparse import Namespace 3 | from unittest import mock 4 | 5 | from dbt.flags import set_from_args 6 | from dbt.adapters.duckdb import DuckDBAdapter 7 | from tests.unit.utils import config_from_parts_or_dicts 8 | 9 | class TestExternalUtils(unittest.TestCase): 10 | def setUp(self): 11 | set_from_args(Namespace(STRICT_MODE=True), {}) 12 | 13 | profile_cfg = { 14 | "outputs": { 15 | "test": { 16 | "type": "duckdb", 17 | "path": ":memory:", 18 | } 19 | }, 20 | "target": "test", 21 | } 22 | 23 | project_cfg = { 24 | "name": "X", 25 | "version": "0.1", 26 | "profile": "test", 27 | "project-root": "/tmp/dbt/does-not-exist", 28 | "config-version": 2, 29 | } 30 | 31 | self.config = config_from_parts_or_dicts(project_cfg, profile_cfg, cli_vars={}) 32 | self._adapter = None 33 | 34 | @property 35 | def adapter(self): 36 | self.mock_mp_context = mock.MagicMock() 37 | if self._adapter is None: 38 | self._adapter = DuckDBAdapter(self.config, self.mock_mp_context) 39 | return self._adapter 40 | 41 | def test_external_write_options(self): 42 | data = [ 43 | ("/tmp/test.csv", {}, "format csv, header 1"), 44 | ("./foo.parquet", {"codec": "zstd"}, "codec zstd, format parquet"), 45 | ("bar", {"delimiter": "|", "header": "0"}, "delimiter '|', header 0, format csv"), 46 | ("a.parquet", {"partition_by": "ds"}, "partition_by ds, format parquet"), 47 | ("b.csv", {"partition_by": "ds,category"}, "partition_by (ds,category), format csv, header 1"), 48 | ("/path/to/c.csv", {"null": "\\N"}, "null '\\N', format csv, header 1") 49 | ] 50 | 51 | for (loc, opts, expected) in data: 52 | assert expected == self.adapter.external_write_options(loc, opts) 53 | 54 | 55 | def test_external_read_location(self): 56 | data = [ 57 | ("bar", {"format": "csv", "delimiter": "|", "header": "0"}, "bar"), 58 | ("/tmp/a", {"partition_by": "ds", "format": "parquet"}, "/tmp/a/*/*.parquet"), 59 | ("b", {"partition_by": "ds,category"}, "b/*/*/*.parquet"), 60 | ] 61 | for (loc, opts, expected) in data: 62 | assert expected == self.adapter.external_read_location(loc, opts) -------------------------------------------------------------------------------- /tests/functional/plugins/test_gsheet.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.util import ( 4 | check_relations_equal, 5 | run_dbt, 6 | ) 7 | 8 | sources_schema_yml = """ 9 | version: 2 10 | sources: 11 | - name: gsheet_source 12 | schema: main 13 | meta: 14 | plugin: gsheet 15 | title: "Josh's Test Spreadsheet" 16 | tables: 17 | - name: gsheet1 18 | description: "My first sheet" 19 | - name: gsheet2 20 | description: "The second sheet in the doc" 21 | meta: 22 | worksheet: "TwoSheet" 23 | """ 24 | 25 | models_source_model1_sql = """ 26 | select * from {{ source('gsheet_source', 'gsheet1') }} 27 | """ 28 | models_source_model2_sql = """ 29 | select * from {{ source('gsheet_source', 'gsheet2') }} 30 | """ 31 | 32 | 33 | # Skipping this b/c it requires using my (@jwills) personal creds 34 | # when testing it locally and also b/c I think there is something 35 | # wrong with profiles_config_update since it can't be used in multiple 36 | # tests in the same pytest session 37 | @pytest.mark.skip 38 | class TestGSheetPlugin: 39 | @pytest.fixture(scope="class") 40 | def profiles_config_update(self, dbt_profile_target): 41 | config = {"method": "oauth"} 42 | if "path" not in dbt_profile_target: 43 | return {} 44 | return { 45 | "test": { 46 | "outputs": { 47 | "dev": { 48 | "type": "duckdb", 49 | "path": dbt_profile_target["path"], 50 | "plugins": [ 51 | {"module": "gsheet", "config": config} 52 | ], 53 | } 54 | }, 55 | "target": "dev", 56 | } 57 | } 58 | 59 | @pytest.fixture(scope="class") 60 | def models(self, test_data_path): 61 | return { 62 | "schema.yml": sources_schema_yml.format(test_data_path=test_data_path), 63 | "source_model1.sql": models_source_model1_sql, 64 | "source_model2.sql": models_source_model2_sql, 65 | } 66 | 67 | def test_gshseet_plugin(self, project): 68 | results = run_dbt() 69 | assert len(results) == 2 70 | 71 | check_relations_equal( 72 | project.adapter, 73 | [ 74 | "gsheet1", 75 | "source_model1", 76 | ], 77 | ) 78 | 79 | check_relations_equal( 80 | project.adapter, 81 | [ 82 | "gsheet2", 83 | "source_model2", 84 | ], 85 | ) 86 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/table_function.sql: -------------------------------------------------------------------------------- 1 | {% materialization table_function, adapter='duckdb' %} 2 | -- This materialization uses DuckDB's Table Function / Table Macro feature to provide parameterized views. 3 | -- Why use this? 4 | -- Late binding of functions means that the underlying table can change (have new columns added), and 5 | -- the function does not need to be recreated. (With a view, the create view statement would need to be re-run). 6 | -- This allows for skipping parts of the dbt DAG, even if the underlying table changed. 7 | -- Parameters can force filter pushdown 8 | -- Functions can provide advanced features like dynamic SQL (the query and query_table functions) 9 | 10 | -- For usage examples, see the tests at /dbt-duckdb/tests/functional/adapter/test_table_function.py 11 | -- (Don't forget parentheses when you pull from a table_function!) 12 | 13 | -- Using Redshift as an example: 14 | -- https://github.com/dbt-labs/dbt-adapters/blob/main/dbt-redshift/src/dbt/include/redshift/macros/materializations/table.sql 15 | {%- set identifier = model['alias'] -%} 16 | {%- set target_relation = api.Relation.create( 17 | identifier=identifier, 18 | schema=schema, 19 | database=database, 20 | type='view') -%} 21 | {%- set backup_relation = none -%} 22 | 23 | -- The parameters config is used to pass in the names of the parameters that will be used within the table function. 24 | -- parameters can be a single string value (with or without commas), or a list of strings. 25 | {%- set parameters=config.get('parameters') -%} 26 | 27 | {{ run_hooks(pre_hooks, inside_transaction=False) }} 28 | 29 | -- `BEGIN` happens here: 30 | {{ run_hooks(pre_hooks, inside_transaction=True) }} 31 | 32 | -- Create or replace the function (macro) 33 | -- By using create or replace (and a transaction), we do not need an old version and new version. 34 | {% call statement('main') -%} 35 | create or replace function {{ target_relation.render() }}( 36 | {% if not parameters %} 37 | {% elif parameters is string or parameters is number %} 38 | {{ parameters if parameters }} 39 | {% else %} 40 | {{ parameters|join(', ') }} 41 | {% endif %} 42 | ) as table ( 43 | {{ sql }}); 44 | {%- endcall %} 45 | 46 | {{ run_hooks(post_hooks, inside_transaction=True) }} 47 | 48 | {% do persist_docs(target_relation, model) %} 49 | 50 | -- `COMMIT` happens here: 51 | {{ adapter.commit() }} 52 | 53 | {{ run_hooks(post_hooks, inside_transaction=False) }} 54 | 55 | {{ return({'relations': [target_relation]}) }} 56 | 57 | {% endmaterialization %} 58 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/gsheet.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any 3 | from typing import Dict 4 | from typing import Literal 5 | 6 | import gspread 7 | import pandas as pd 8 | 9 | from . import BasePlugin 10 | from . import PluginConfig 11 | from ..utils import SourceConfig 12 | 13 | 14 | @dataclass 15 | class GSheetConfig(PluginConfig): 16 | method: Literal["service", "oauth"] 17 | 18 | def client(self): 19 | if self.method == "service": 20 | return gspread.service_account() 21 | else: 22 | return gspread.oauth() 23 | 24 | 25 | class Plugin(BasePlugin): 26 | def initialize(self, config: Dict[str, Any]): 27 | self._config = GSheetConfig.from_dict(config) 28 | self._gc = self._config.client() 29 | 30 | def load(self, source_config: SourceConfig): 31 | doc = None 32 | if "title" in source_config: 33 | doc = self._gc.open(source_config["title"]) 34 | elif "key" in source_config: 35 | doc = self._gc.open_by_key(source_config["key"]) 36 | elif "url" in source_config: 37 | doc = self._gc.open_by_url(source_config["url"]) 38 | else: 39 | raise Exception("Source config did not indicate a method to open a GSheet to read") 40 | 41 | sheet = None 42 | if "worksheet" in source_config: 43 | work_id = source_config["worksheet"] 44 | if isinstance(work_id, int): 45 | sheet = doc.get_worksheet(work_id) 46 | elif isinstance(work_id, str): 47 | sheet = doc.worksheet(work_id) 48 | else: 49 | raise Exception( 50 | f"Could not identify a worksheet in the doc from identifier: {work_id}" 51 | ) 52 | else: 53 | sheet = doc.sheet1 54 | 55 | if "range" in source_config: 56 | range = source_config["range"] 57 | df = pd.DataFrame(sheet.get(range)) 58 | if "headers" in source_config: 59 | headers = source_config["headers"] 60 | if len(headers) == len(df.columns): 61 | df.columns = headers 62 | return df 63 | else: 64 | raise Exception( 65 | f"Number of configured headers ({len(headers)}) does not match number of columns in fetched range ({len(df.columns)})." 66 | ) 67 | else: 68 | df.rename(columns=df.iloc[0]).drop(df.index[0]).reset_index(drop=True) 69 | return df 70 | 71 | else: 72 | return pd.DataFrame(sheet.get_all_records()) 73 | -------------------------------------------------------------------------------- /.github/workflows/nightly.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Runs unit tests and functional tests using the latest nightly DuckDB build. 3 | # Any tests that use community extensions are skipped because these are not released nightly. 4 | 5 | # **why?** 6 | # Ensure code for dbt is compatible with the bleeding edge version of DuckDB. 7 | 8 | # **when?** 9 | # This will run nightly, after DuckDB releases its nightly build. 10 | 11 | name: Tests and Code Checks (DuckDB nightly) 12 | 13 | on: 14 | schedule: 15 | - cron: '0 0 * * *' # every 24 hours, top of the hour 16 | workflow_dispatch: 17 | 18 | permissions: read-all 19 | 20 | defaults: 21 | run: 22 | shell: bash 23 | 24 | jobs: 25 | nightly: 26 | name: nightly test / python ${{ matrix.python-version }} 27 | 28 | runs-on: ubuntu-latest 29 | 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | python-version: ['3.10', '3.11', '3.12', '3.13'] 34 | 35 | env: 36 | TOXENV: "nightly" 37 | PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv" 38 | S3_MD_ORG_KEY: ${{ secrets.S3_MD_ORG_KEY }} 39 | S3_MD_ORG_REGION: ${{ secrets.S3_MD_ORG_REGION }} 40 | S3_MD_ORG_SECRET: ${{ secrets.S3_MD_ORG_SECRET }} 41 | 42 | steps: 43 | - name: Check out the repository 44 | uses: actions/checkout@v6 45 | with: 46 | persist-credentials: false 47 | 48 | - name: Set up Python ${{ matrix.python-version }} 49 | uses: actions/setup-python@v6 50 | with: 51 | python-version: ${{ matrix.python-version }} 52 | 53 | - name: Install python dependencies 54 | run: | 55 | python -m pip install tox 56 | python -m pip --version 57 | tox --version 58 | 59 | - name: Run tox 60 | run: tox 61 | 62 | - name: Get current date 63 | if: always() 64 | id: date 65 | run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts 66 | 67 | - uses: actions/upload-artifact@v6 68 | if: always() 69 | with: 70 | name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv 71 | path: unit_results.csv 72 | 73 | notify-failure: 74 | name: Send Slack notification on failure 75 | if: failure() 76 | needs: [nightly] 77 | runs-on: ubuntu-latest 78 | steps: 79 | - name: Send Slack notification 80 | uses: slackapi/slack-github-action@v2.1.1 81 | with: 82 | webhook: ${{ secrets.MOTHERDUCK_CI_NOTIFICATION_WEBHOOK }} 83 | webhook-type: webhook-trigger 84 | payload: | 85 | { 86 | "text": "dbt-duckdb nightly workflow failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" 87 | } 88 | -------------------------------------------------------------------------------- /tests/functional/plugins/motherduck/test_motherduck_ducklake.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from argparse import Namespace 3 | from unittest import mock 4 | 5 | from dbt.flags import set_from_args 6 | from dbt.adapters.duckdb import DuckDBAdapter 7 | from dbt.adapters.duckdb.relation import DuckDBRelation 8 | from tests.unit.utils import config_from_parts_or_dicts 9 | 10 | 11 | class TestMotherduckDucklakeDetection(unittest.TestCase): 12 | def setUp(self): 13 | set_from_args(Namespace(STRICT_MODE=True), {}) 14 | 15 | # Use a MotherDuck path to align with plugin context, but we won't actually connect 16 | self.base_profile_cfg = { 17 | "outputs": { 18 | "test": { 19 | "type": "duckdb", 20 | "path": "md:my_db", 21 | } 22 | }, 23 | "target": "test", 24 | } 25 | 26 | project_cfg = { 27 | "name": "X", 28 | "version": "0.1", 29 | "profile": "test", 30 | "project-root": "/tmp/dbt/does-not-exist", 31 | "quoting": { 32 | "identifier": False, 33 | "schema": True, 34 | }, 35 | "config-version": 2, 36 | } 37 | 38 | self.project_cfg = project_cfg 39 | self.mock_mp_context = mock.MagicMock() 40 | 41 | def _get_adapter(self, profile_cfg): 42 | config = config_from_parts_or_dicts(self.project_cfg, profile_cfg, cli_vars={}) 43 | return DuckDBAdapter(config, self.mock_mp_context) 44 | 45 | 46 | def test_is_ducklake_primary_database(self): 47 | profile_cfg = self.base_profile_cfg.copy() 48 | profile_cfg["outputs"]["test"]["is_ducklake"] = True 49 | 50 | adapter = self._get_adapter(profile_cfg) 51 | relation = DuckDBRelation.create(database="my_db", schema="main", identifier="t2") 52 | 53 | assert adapter.is_ducklake(relation) is True 54 | 55 | 56 | def test_is_not_ducklake(self): 57 | profile_cfg = self.base_profile_cfg.copy() 58 | adapter = self._get_adapter(profile_cfg) 59 | relation = DuckDBRelation.create(database="my_db", schema="main", identifier="t2") 60 | assert adapter.is_ducklake(relation) is False 61 | 62 | 63 | def test_is_ducklake_in_attachment(self): 64 | profile_cfg = self.base_profile_cfg.copy() 65 | profile_cfg["outputs"]["test"]["attach"] = [ 66 | { 67 | "path": "md:some_db", 68 | "type": "duckdb", 69 | "is_ducklake": True 70 | } 71 | ] 72 | 73 | adapter = self._get_adapter(profile_cfg) 74 | relation = DuckDBRelation.create(database="some_db", schema="main", identifier="t") 75 | 76 | assert adapter.is_ducklake(relation) is True 77 | 78 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/utils.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any 3 | from typing import Dict 4 | from typing import List 5 | from typing import Optional 6 | from typing import Sequence 7 | 8 | from dbt.adapters.base.column import Column 9 | from dbt.adapters.base.relation import BaseRelation 10 | from dbt.adapters.contracts.relation import RelationConfig 11 | # TODO 12 | # from dbt.context.providers import RuntimeConfigObject 13 | 14 | 15 | @dataclass 16 | class SourceConfig: 17 | name: str 18 | identifier: str 19 | schema: str 20 | database: Optional[str] 21 | meta: Dict[str, Any] 22 | tags: List[str] 23 | 24 | def get(self, key, default=None): 25 | return self.meta.get(key, default) 26 | 27 | def __getitem__(self, key): 28 | return self.meta[key] 29 | 30 | def __contains__(self, key): 31 | return key in self.meta 32 | 33 | def table_name(self) -> str: 34 | if self.database: 35 | return ".".join([self.database, self.schema, self.identifier]) 36 | else: 37 | return ".".join([self.schema, self.identifier]) 38 | 39 | def as_dict(self) -> Dict[str, Any]: 40 | base = { 41 | "name": self.name, 42 | "identifier": self.identifier, 43 | "schema": self.schema, 44 | "database": self.database, 45 | "tags": self.tags, 46 | } 47 | base.update(self.meta) 48 | return base 49 | 50 | @classmethod 51 | def create_from_source(cls, source: RelationConfig) -> "SourceConfig": 52 | meta = source.meta.copy() 53 | # Use the config properties as well if they are present 54 | config_properties = source.config.extra if source.config else {} 55 | meta.update(config_properties) 56 | return SourceConfig( 57 | name=source.name, 58 | identifier=source.identifier, 59 | schema=source.schema, 60 | database=source.database, 61 | meta=meta, 62 | tags=source.tags or [], 63 | ) 64 | 65 | 66 | @dataclass 67 | class TargetLocation: 68 | path: str 69 | format: str 70 | 71 | def as_dict(self) -> Dict[str, Any]: 72 | return {"path": self.path, "format": self.format} 73 | 74 | 75 | @dataclass 76 | class TargetConfig: 77 | relation: BaseRelation 78 | column_list: Sequence[Column] 79 | config: Any # TODO 80 | location: Optional[TargetLocation] = None 81 | 82 | def as_dict(self) -> Dict[str, Any]: 83 | base = { 84 | "relation": self.relation.to_dict(), 85 | "column_list": [{"column": c.column, "dtype": c.dtype} for c in self.column_list], 86 | "config": self.config, 87 | } 88 | if self.location: 89 | base["location"] = self.location.as_dict() 90 | return base 91 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_rematerialize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from dbt.tests.util import run_dbt, relation_from_name 4 | from dbt.adapters.duckdb import DuckDBConnectionManager 5 | 6 | upstream_model_sql = """ 7 | select range from range(3) 8 | """ 9 | 10 | upstream_partition_by_model = """ 11 | {{ config(materialized='external', options={"partition_by": "a"}) }} 12 | select range as a, 'foo' as b from range(5) 13 | """ 14 | 15 | downstream_model_sql = """ 16 | select range * 2 from {{ ref('upstream_model') }} 17 | """ 18 | 19 | other_downstream_model_sql = """ 20 | select range * 5 from {{ ref('upstream_model') }} 21 | """ 22 | 23 | downstream_of_partition_model = """ 24 | select a from {{ ref('upstream_partition_by_model') }} 25 | """ 26 | 27 | 28 | # class must begin with 'Test' 29 | class TestRematerializeDownstreamExternalModel: 30 | """ 31 | External models should load in dependencies when they exist. 32 | 33 | We test that after materializing upstream and downstream models, we can 34 | materialize the downstream model by itself, even if we are using an 35 | in-memory database. 36 | """ 37 | 38 | @pytest.fixture(scope="class") 39 | def dbt_profile_target(self, dbt_profile_target, tmp_path_factory): 40 | extroot = str(tmp_path_factory.getbasetemp() / "rematerialize") 41 | os.mkdir(extroot) 42 | dbt_profile_target["external_root"] = extroot 43 | return dbt_profile_target 44 | 45 | @pytest.fixture(scope="class") 46 | def project_config_update(self): 47 | return { 48 | "name": "base", 49 | "models": {"+materialized": "external"}, 50 | "on-run-start": ["{{ register_upstream_external_models() }}"], 51 | } 52 | 53 | @pytest.fixture(scope="class") 54 | def models(self): 55 | return { 56 | "upstream_model.sql": upstream_model_sql, 57 | "upstream_partition_by_model.sql": upstream_partition_by_model, 58 | "downstream_model.sql": downstream_model_sql, 59 | "other_downstream_model.sql": other_downstream_model_sql, 60 | "downstream_of_partition_model.sql": downstream_of_partition_model, 61 | } 62 | 63 | def test_run(self, project): 64 | run_dbt(["run"]) 65 | 66 | # Force close the :memory: connection 67 | DuckDBConnectionManager.close_all_connections() 68 | run_dbt( 69 | [ 70 | "run", 71 | "--select", 72 | "downstream_model other_downstream_model downstream_of_partition_model", 73 | ] 74 | ) 75 | 76 | # really makes sure we have created the downstream model 77 | relation = relation_from_name(project.adapter, "downstream_of_partition_model") 78 | result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") 79 | assert result[0] == 5 80 | -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/table.sql: -------------------------------------------------------------------------------- 1 | {% materialization table, adapter="duckdb", supported_languages=['sql', 'python'] %} 2 | 3 | {%- set language = model['language'] -%} 4 | 5 | {%- set existing_relation = load_cached_relation(this) -%} 6 | {%- set target_relation = this.incorporate(type='table') %} 7 | {%- set intermediate_relation = make_intermediate_relation(target_relation) -%} 8 | -- the intermediate_relation should not already exist in the database; get_relation 9 | -- will return None in that case. Otherwise, we get a relation that we can drop 10 | -- later, before we try to use this name for the current operation 11 | {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%} 12 | /* 13 | See ../view/view.sql for more information about this relation. 14 | */ 15 | {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%} 16 | {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%} 17 | -- as above, the backup_relation should not already exist 18 | {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%} 19 | -- grab current tables grants config for comparision later on 20 | {% set grant_config = config.get('grants') %} 21 | 22 | -- drop the temp relations if they exist already in the database 23 | {{ drop_relation_if_exists(preexisting_intermediate_relation) }} 24 | {{ drop_relation_if_exists(preexisting_backup_relation) }} 25 | 26 | {{ run_hooks(pre_hooks, inside_transaction=False) }} 27 | 28 | -- `BEGIN` happens here: 29 | {{ run_hooks(pre_hooks, inside_transaction=True) }} 30 | 31 | -- build model 32 | {% call statement('main', language=language) -%} 33 | {{- create_table_as(False, intermediate_relation, compiled_code, language) }} 34 | {%- endcall %} 35 | 36 | -- cleanup 37 | {% if existing_relation is not none %} 38 | {#-- Drop indexes before renaming to avoid dependency errors --#} 39 | {% do drop_indexes_on_relation(existing_relation) %} 40 | {{ adapter.rename_relation(existing_relation, backup_relation) }} 41 | {% endif %} 42 | 43 | {{ adapter.rename_relation(intermediate_relation, target_relation) }} 44 | 45 | {% do create_indexes(target_relation) %} 46 | 47 | {{ run_hooks(post_hooks, inside_transaction=True) }} 48 | 49 | {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} 50 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 51 | 52 | {% do persist_docs(target_relation, model) %} 53 | 54 | -- `COMMIT` happens here 55 | {{ adapter.commit() }} 56 | 57 | -- finally, drop the existing/backup relation after the commit 58 | {{ drop_relation_if_exists(backup_relation) }} 59 | 60 | {{ run_hooks(post_hooks, inside_transaction=False) }} 61 | 62 | {{ return({'relations': [target_relation]}) }} 63 | {% endmaterialization %} 64 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_attach.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import duckdb 5 | import pytest 6 | 7 | from dbt.adapters.duckdb import DuckDBConnectionManager 8 | from dbt.tests.util import run_dbt 9 | 10 | sources_schema_yml = """ 11 | version: 2 12 | sources: 13 | - name: attached_source 14 | database: attach_test 15 | schema: analytics 16 | tables: 17 | - name: attached_table 18 | description: "An attached table" 19 | columns: 20 | - name: id 21 | description: "An id" 22 | tests: 23 | - unique 24 | - not_null 25 | """ 26 | 27 | models_source_model_sql = """ 28 | select * from {{ source('attached_source', 'attached_table') }} 29 | """ 30 | 31 | models_target_model_sql = """ 32 | {{ config(materialized='table', database='attach_test') }} 33 | SELECT * FROM {{ ref('source_model') }} 34 | """ 35 | 36 | 37 | @pytest.mark.skip_profile("memory", "buenavista", "md") 38 | class TestAttachedDatabase: 39 | @pytest.fixture(scope="class") 40 | def attach_test_db(self): 41 | with tempfile.TemporaryDirectory() as temp_dir: 42 | path = os.path.join(temp_dir, "attach_test.duckdb") 43 | db = duckdb.connect(path) 44 | db.execute("CREATE SCHEMA analytics") 45 | db.execute("CREATE TABLE analytics.attached_table AS SELECT 1 as id") 46 | db.close() 47 | yield path 48 | 49 | @pytest.fixture(scope="class") 50 | def profiles_config_update(self, dbt_profile_target, attach_test_db): 51 | return { 52 | "test": { 53 | "outputs": { 54 | "dev": { 55 | "type": "duckdb", 56 | "path": dbt_profile_target.get("path", ":memory:"), 57 | "attach": [{"path": attach_test_db}], 58 | } 59 | }, 60 | "target": "dev", 61 | } 62 | } 63 | 64 | @pytest.fixture(scope="class") 65 | def models(self): 66 | return { 67 | "schema.yml": sources_schema_yml, 68 | "source_model.sql": models_source_model_sql, 69 | "target_model.sql": models_target_model_sql, 70 | } 71 | 72 | def test_attached_databases(self, project, attach_test_db): 73 | results = run_dbt() 74 | assert len(results) == 2 75 | 76 | test_results = run_dbt(["test"]) 77 | assert len(test_results) == 2 78 | 79 | DuckDBConnectionManager.close_all_connections() 80 | 81 | # check that the model is created in the attached db 82 | db = duckdb.connect(attach_test_db) 83 | ret = db.execute("SELECT * FROM target_model").fetchall() 84 | assert ret[0][0] == 1 85 | db.close() 86 | 87 | # check that everything works on a re-run of dbt 88 | rerun_results = run_dbt() 89 | assert len(rerun_results) == 2 90 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/environments/buenavista.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import psycopg2 4 | 5 | from . import Environment 6 | from .. import credentials 7 | from .. import utils 8 | from dbt.adapters.contracts.connection import AdapterResponse 9 | from dbt.adapters.contracts.connection import Connection 10 | 11 | 12 | class BVEnvironment(Environment): 13 | @classmethod 14 | def _get_conn(cls, dbname: str, remote: credentials.Remote): 15 | return psycopg2.connect( 16 | dbname=dbname, 17 | user=remote.user, 18 | host=remote.host, 19 | port=remote.port, 20 | password=remote.password, 21 | ) 22 | 23 | def __init__(self, credentials: credentials.DuckDBCredentials): 24 | super().__init__(credentials) 25 | if not self.creds.remote: 26 | raise Exception("BVConnection only works with a remote host") 27 | 28 | def handle(self): 29 | # Extensions/settings need to be configured per cursor 30 | conn = self._get_conn(self.creds.database, self.creds.remote) 31 | cursor = self.initialize_cursor(self.creds, conn.cursor()) 32 | cursor.close() 33 | return conn 34 | 35 | def is_cancelable(cls): 36 | return False 37 | 38 | @classmethod 39 | def cancel(cls, connection: Connection): 40 | pass 41 | 42 | def get_binding_char(self) -> str: 43 | return "%s" 44 | 45 | def submit_python_job(self, handle, parsed_model: dict, compiled_code: str) -> AdapterResponse: 46 | identifier = parsed_model["alias"] 47 | payload = { 48 | "method": "dbt_python_job", 49 | "params": { 50 | "module_name": identifier, 51 | "module_definition": compiled_code, 52 | }, 53 | } 54 | # TODO: handle errors here 55 | handle.cursor().execute(json.dumps(payload)) 56 | return AdapterResponse(_message="OK") 57 | 58 | def load_source(self, plugin_name: str, source_config: utils.SourceConfig): 59 | handle = self.handle() 60 | payload = { 61 | "method": "dbt_load_source", 62 | "params": { 63 | "plugin_name": plugin_name, 64 | "source_config": source_config.as_dict(), 65 | }, 66 | } 67 | cursor = handle.cursor() 68 | cursor.execute(json.dumps(payload)) 69 | cursor.close() 70 | handle.close() 71 | 72 | def store_relation(self, plugin_name: str, target_config: utils.TargetConfig) -> None: 73 | handle = self.handle() 74 | payload = { 75 | "method": "dbt_store_relation", 76 | "params": { 77 | "plugin_name": plugin_name, 78 | "target_config": target_config.as_dict(), 79 | }, 80 | } 81 | cursor = handle.cursor() 82 | cursor.execute(json.dumps(payload)) 83 | cursor.close() 84 | handle.close() 85 | -------------------------------------------------------------------------------- /tests/unit/test_column.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.adapters.duckdb.column import DuckDBColumn 4 | 5 | # Test cases for is_float method 6 | @pytest.mark.parametrize("dtype, expected", [ 7 | ("real", True), 8 | ("float", True), 9 | ("float4", True), 10 | ("float8", True), 11 | ("double", True), 12 | ("integer", False), 13 | ("string", False), 14 | ("bigint", False) 15 | ]) 16 | def test_is_float(dtype, expected): 17 | column = DuckDBColumn(column="float_test", dtype=dtype) 18 | assert column.is_float() == expected 19 | 20 | # Test cases for is_integer method 21 | @pytest.mark.parametrize("dtype, expected", [ 22 | ("tinyint", True), 23 | ("smallint", True), 24 | ("integer", True), 25 | ("bigint", True), 26 | ("hugeint", True), 27 | ("utinyint", True), 28 | ("usmallint", True), 29 | ("uinteger", True), 30 | ("ubigint", True), 31 | ("int1", True), 32 | ("int2", True), 33 | ("int4", True), 34 | ("int8", True), 35 | ("short", True), 36 | ("int", True), 37 | ("signed", True), 38 | ("long", True), 39 | ("float", False), 40 | ("string", False), 41 | ("double", False) 42 | ]) 43 | def test_is_integer(dtype, expected): 44 | column = DuckDBColumn(column="integer_test", dtype=dtype) 45 | assert column.is_integer() == expected 46 | 47 | # Test cases for is_struct method 48 | @pytest.mark.parametrize("dtype, expected", [ 49 | ("struct(a integer, b varchar)", True), 50 | ("struct(a integer)", True), 51 | ("STRUCT(a integer, b varchar)", True), 52 | ("integer", False), 53 | ("varchar", False), 54 | ]) 55 | def test_is_struct(dtype, expected): 56 | column = DuckDBColumn(column="struct_test", dtype=dtype) 57 | assert column.is_struct() == expected 58 | 59 | # Test cases for flatten method 60 | def test_flatten_simple_struct(): 61 | column = DuckDBColumn(column="struct_test", dtype="struct(a integer, b varchar)") 62 | flattened = column.flatten() 63 | assert len(flattened) == 2 64 | assert flattened[0].column == "struct_test.a" 65 | assert flattened[0].dtype == "integer" 66 | assert flattened[1].column == "struct_test.b" 67 | assert flattened[1].dtype == "varchar" 68 | 69 | def test_flatten_nested_struct(): 70 | column = DuckDBColumn(column="struct_test", dtype="struct(a integer, b struct(c integer, d varchar))") 71 | flattened = column.flatten() 72 | assert len(flattened) == 3 73 | assert flattened[0].column == "struct_test.a" 74 | assert flattened[0].dtype == "integer" 75 | assert flattened[1].column == "struct_test.b.c" 76 | assert flattened[1].dtype == "integer" 77 | assert flattened[2].column == "struct_test.b.d" 78 | assert flattened[2].dtype == "varchar" 79 | 80 | def test_flatten_non_struct(): 81 | column = DuckDBColumn(column="integer_test", dtype="integer") 82 | flattened = column.flatten() 83 | assert len(flattened) == 1 84 | assert flattened[0].column == "integer_test" 85 | assert flattened[0].dtype == "integer" -------------------------------------------------------------------------------- /tests/functional/plugins/motherduck/test_motherduck_write_conflict.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | from dbt.exceptions import DbtRuntimeError 4 | 5 | 6 | incremental_model_1_sql = """ 7 | {{ config(materialized='incremental') }} 8 | 9 | select 10 | generate_series as id, 11 | 'model_1_data_' || generate_series::varchar as data, 12 | current_timestamp as created_at 13 | from generate_series(1, 100) 14 | 15 | {% if is_incremental() %} 16 | where generate_series > (select coalesce(max(id), 0) from {{ this }}) 17 | {% endif %} 18 | """ 19 | 20 | incremental_model_2_sql = """ 21 | {{ config(materialized='incremental') }} 22 | 23 | select 24 | generate_series as id, 25 | 'model_2_data_' || generate_series::varchar as data, 26 | current_timestamp as created_at 27 | from generate_series(1, 50) 28 | 29 | {% if is_incremental() %} 30 | where generate_series > (select coalesce(max(id), 0) from {{ this }}) 31 | {% endif %} 32 | """ 33 | 34 | 35 | @pytest.mark.skip_profile("buenavista", "file", "memory") 36 | class TestMDWriteConflict: 37 | """Test to reproduce the write-write conflict with multiple models trying to create the dbt_temp schema concurrently.""" 38 | 39 | @pytest.fixture(scope="class") 40 | def profiles_config_update(self, dbt_profile_target): 41 | """Configure with 2 threads to trigger write conflict.""" 42 | return { 43 | "test": { 44 | "outputs": { 45 | "dev": { 46 | "type": "duckdb", 47 | "path": "test_write_conflict.duckdb", 48 | "attach": [ 49 | { 50 | "path": "md:", 51 | } # Attach MotherDuck 52 | ], 53 | "threads": 2, # Enable threading to trigger conflict 54 | } 55 | }, 56 | "target": "dev", 57 | } 58 | } 59 | 60 | @pytest.fixture(scope="class") 61 | def models(self): 62 | return { 63 | "incremental_model_1.sql": incremental_model_1_sql, 64 | "incremental_model_2.sql": incremental_model_2_sql, 65 | } 66 | 67 | def test_write_conflict_on_second_run(self, project): 68 | """ 69 | Test that reproduces the write-write conflict: 70 | 1. First run always succeeds (initializes both incremental models) 71 | 2. Second run, which is the first true incremental run, should succeed, 72 | while it previously failed with a write-write conflict due to 73 | both models trying to create the dbt_temp schema simultaneously. 74 | """ 75 | results = run_dbt(expect_pass=True) 76 | 77 | res1 = project.run_sql("SELECT count(*) FROM incremental_model_1", fetch="one") 78 | assert res1[0] == 100 79 | 80 | res2 = project.run_sql("SELECT count(*) FROM incremental_model_2", fetch="one") 81 | assert res2[0] == 50 82 | 83 | run_dbt(expect_pass=True) 84 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | "build": { 4 | "dockerfile": "Dockerfile", 5 | "context": "..", 6 | "args": { 7 | // Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6 8 | // Append -bullseye or -buster to pin to an OS version. 9 | // Use -bullseye variants on local on arm64/Apple Silicon. 10 | "VARIANT": "3.11", 11 | // Options 12 | "NODE_VERSION": "none" 13 | } 14 | }, 15 | // Configure tool-specific properties. 16 | "customizations": { 17 | // Configure properties specific to VS Code. 18 | "vscode": { 19 | // Set *default* container specific settings.json values on container create. 20 | "settings": { 21 | "python.defaultInterpreterPath": "/usr/local/bin/python", 22 | "python.testing.pytestEnabled": true, 23 | "python.testing.unittestEnabled": false, 24 | "python.linting.enabled": true, 25 | "python.linting.flake8Enabled": true, 26 | "python.linting.mypyEnabled": true, 27 | "python.linting.pylintEnabled": false, 28 | "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", 29 | "python.formatting.provider": "black", 30 | "python.formatting.blackPath": "/usr/local/py-utils/bin/black", 31 | "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", 32 | "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", 33 | "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", 34 | "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", 35 | "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", 36 | "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", 37 | "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", 38 | "[python]": { 39 | "editor.defaultFormatter": "ms-python.python", 40 | "editor.formatOnSave": true, 41 | "editor.tabSize": 4, 42 | "editor.codeActionsOnSave": { 43 | "source.organizeImports": true 44 | } 45 | } 46 | }, 47 | // Add the IDs of extensions you want installed when the container is created. 48 | "extensions": [ 49 | "ms-python.python", 50 | "ms-python.vscode-pylance" 51 | ] 52 | } 53 | }, 54 | // "features": { 55 | // // Allow the devcontainer to run host docker commands, see https://github.com/devcontainers/templates/tree/main/src/docker-outside-of-docker 56 | // "ghcr.io/devcontainers/features/docker-outside-of-docker:1": { 57 | // "enableNonRootDocker": true 58 | // } 59 | // }, 60 | // "mounts": [ 61 | // "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" 62 | // ], 63 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 64 | // "forwardPorts": [], 65 | // Use 'postCreateCommand' to run commands after the container is created. 66 | // "postCreateCommand": "pip3 install --user -r requirements.txt", 67 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 68 | "remoteUser": "vscode", 69 | "workspaceFolder": "/workspaces/dbt-duckdb", 70 | "postCreateCommand": "pip install -e . && pip install -r dev-requirements.txt" 71 | } 72 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_basic.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations 4 | from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests 5 | from dbt.tests.adapter.basic.test_singular_tests_ephemeral import ( 6 | BaseSingularTestsEphemeral, 7 | ) 8 | from dbt.tests.adapter.basic.test_empty import BaseEmpty 9 | from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral 10 | from dbt.tests.adapter.basic.test_incremental import BaseIncremental 11 | from dbt.tests.adapter.basic.test_incremental import BaseIncrementalNotSchemaChange 12 | from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests 13 | from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols 14 | from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp 15 | from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod 16 | from dbt.tests.adapter.basic.test_validate_connection import BaseValidateConnection 17 | from dbt.tests.adapter.basic.test_docs_generate import ( 18 | BaseDocsGenerate, 19 | BaseDocsGenReferences, 20 | ) 21 | from dbt.tests.adapter.basic.expected_catalog import ( 22 | base_expected_catalog, 23 | no_stats, 24 | expected_references_catalog, 25 | ) 26 | 27 | 28 | class TestSimpleMaterializationsDuckDB(BaseSimpleMaterializations): 29 | pass 30 | 31 | 32 | class TestSingularTestsDuckDB(BaseSingularTests): 33 | pass 34 | 35 | 36 | class TestSingularTestsEphemeralDuckDB(BaseSingularTestsEphemeral): 37 | pass 38 | 39 | 40 | class TestEmptyDuckDB(BaseEmpty): 41 | pass 42 | 43 | 44 | class TestEphemeralDuckDB(BaseEphemeral): 45 | pass 46 | 47 | 48 | class TestIncrementalDuckDB(BaseIncremental): 49 | pass 50 | 51 | class TestBaseIncrementalNotSchemaChange(BaseIncrementalNotSchemaChange): 52 | pass 53 | 54 | 55 | class TestGenericTestsDuckDB(BaseGenericTests): 56 | pass 57 | 58 | 59 | class TestSnapshotCheckColsDuckDB(BaseSnapshotCheckCols): 60 | pass 61 | 62 | 63 | class TestSnapshotTimestampDuckDB(BaseSnapshotTimestamp): 64 | pass 65 | 66 | 67 | class TestBaseAdapterMethodDuckDB(BaseAdapterMethod): 68 | pass 69 | 70 | 71 | class TestValidateConnectionDuckDB(BaseValidateConnection): 72 | pass 73 | 74 | 75 | class TestDocsGenerateDuckDB(BaseDocsGenerate): 76 | @pytest.fixture(scope="class") 77 | def expected_catalog(self, project): 78 | return base_expected_catalog( 79 | project, 80 | role=None, 81 | id_type="INTEGER", 82 | text_type="VARCHAR", 83 | time_type="TIMESTAMP", 84 | view_type="VIEW", 85 | table_type="BASE TABLE", 86 | model_stats=no_stats(), 87 | ) 88 | 89 | 90 | class TestDocsGenReferencesDuckDB(BaseDocsGenReferences): 91 | @pytest.fixture(scope="class") 92 | def expected_catalog(self, project): 93 | return expected_references_catalog( 94 | project, 95 | role=None, 96 | id_type="INTEGER", 97 | text_type="VARCHAR", 98 | time_type="TIMESTAMP", 99 | view_type="VIEW", 100 | table_type="BASE TABLE", 101 | model_stats=no_stats(), 102 | bigint_type="BIGINT", 103 | ) 104 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_constraints.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.constraints.test_constraints import ( 4 | BaseTableConstraintsColumnsEqual, 5 | BaseViewConstraintsColumnsEqual, 6 | BaseIncrementalConstraintsColumnsEqual, 7 | BaseConstraintsRuntimeDdlEnforcement, 8 | BaseConstraintsRollback, 9 | BaseIncrementalConstraintsRuntimeDdlEnforcement, 10 | BaseIncrementalConstraintsRollback, 11 | BaseModelConstraintsRuntimeEnforcement, 12 | ) 13 | 14 | 15 | class DuckDBColumnEqualSetup: 16 | @pytest.fixture 17 | def int_type(self): 18 | return "INT" 19 | 20 | @pytest.fixture 21 | def string_type(self): 22 | return "VARCHAR" 23 | 24 | @pytest.fixture 25 | def data_types(self, schema_int_type, int_type, string_type): 26 | # sql_column_value, schema_data_type, error_data_type 27 | return [ 28 | ["1", schema_int_type, int_type], 29 | ["'1'", string_type, string_type], 30 | ["true", "bool", "BOOL"], 31 | ["'2013-11-03 00:00:00-07'::timestamp", "TIMESTAMP", "TIMESTAMP"], 32 | ["'2013-11-03 00:00:00-07'::timestamptz", "TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE"], 33 | ["ARRAY['a','b','c']", "VARCHAR[]", "VARCHAR[]"], 34 | ["ARRAY[1,2,3]", "INTEGER[]", "INTEGER[]"], 35 | ["'1'::numeric", "numeric", "DECIMAL"], 36 | [ 37 | """'{"bar": "baz", "balance": 7.77, "active": false}'::json""", 38 | "json", 39 | "JSON", 40 | ], 41 | ] 42 | 43 | 44 | class TestTableConstraintsColumnsEqual( 45 | DuckDBColumnEqualSetup, BaseTableConstraintsColumnsEqual 46 | ): 47 | pass 48 | 49 | 50 | class TestViewConstraintsColumnsEqual( 51 | DuckDBColumnEqualSetup, BaseViewConstraintsColumnsEqual 52 | ): 53 | pass 54 | 55 | 56 | class TestIncrementalConstraintsColumnsEqual( 57 | DuckDBColumnEqualSetup, BaseIncrementalConstraintsColumnsEqual 58 | ): 59 | pass 60 | 61 | 62 | @pytest.mark.skip_profile("md") 63 | class TestTableConstraintsRuntimeDdlEnforcement( 64 | DuckDBColumnEqualSetup, BaseConstraintsRuntimeDdlEnforcement 65 | ): 66 | pass 67 | 68 | 69 | @pytest.mark.skip_profile("md", "buenavista") 70 | class TestTableConstraintsRollback(DuckDBColumnEqualSetup, BaseConstraintsRollback): 71 | @pytest.fixture(scope="class") 72 | def expected_error_messages(self): 73 | return ["NOT NULL constraint failed"] 74 | 75 | 76 | @pytest.mark.skip_profile("md") 77 | class TestIncrementalConstraintsRuntimeDdlEnforcement( 78 | DuckDBColumnEqualSetup, BaseIncrementalConstraintsRuntimeDdlEnforcement 79 | ): 80 | @pytest.fixture(scope="class") 81 | def expected_error_messages(self): 82 | return ["NOT NULL constraint failed"] 83 | 84 | 85 | @pytest.mark.skip_profile("md", "buenavista") 86 | class TestIncrementalConstraintsRollback( 87 | DuckDBColumnEqualSetup, BaseIncrementalConstraintsRollback 88 | ): 89 | @pytest.fixture(scope="class") 90 | def expected_error_messages(self): 91 | return ["NOT NULL constraint failed"] 92 | 93 | 94 | @pytest.mark.skip_profile("md") 95 | class TestModelConstraintsRuntimeEnforcement( 96 | DuckDBColumnEqualSetup, BaseModelConstraintsRuntimeEnforcement 97 | ): 98 | @pytest.fixture(scope="class") 99 | def expected_error_messages(self): 100 | return ["NOT NULL constraint failed"] 101 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/secrets.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any 3 | from typing import Dict 4 | from typing import List 5 | from typing import Optional 6 | from typing import Union 7 | 8 | from dbt_common.dataclass_schema import dbtClassMixin 9 | 10 | 11 | DEFAULT_SECRET_PREFIX = "_dbt_secret_" 12 | 13 | 14 | @dataclass 15 | class Secret(dbtClassMixin): 16 | type: str 17 | persistent: Optional[bool] = False 18 | name: Optional[str] = None 19 | provider: Optional[str] = None 20 | scope: Optional[Union[str, List[str]]] = None 21 | secret_kwargs: Optional[Dict[str, Any]] = None 22 | 23 | @classmethod 24 | def create( 25 | cls, 26 | secret_type: str, 27 | persistent: Optional[bool] = None, 28 | name: Optional[str] = None, 29 | provider: Optional[str] = None, 30 | scope: Optional[Union[str, List[str]]] = None, 31 | **kwargs, 32 | ): 33 | # Create and return Secret 34 | return cls( 35 | type=secret_type, 36 | persistent=persistent, 37 | name=name, 38 | provider=provider, 39 | scope=scope, 40 | secret_kwargs=kwargs, 41 | ) 42 | 43 | def _format_value(self, key: str, value: Any) -> str: 44 | """Format a value for DuckDB SQL based on its type and key.""" 45 | # Keys that should not be quoted 46 | unquoted_keys = ["type", "provider", "extra_http_headers"] 47 | 48 | if isinstance(value, dict): 49 | # Format as DuckDB map: map {'key1': 'value1', 'key2': 'value2'} 50 | items = [f"'{k}': '{v}'" for k, v in value.items()] 51 | return f"{key} map {{{', '.join(items)}}}" 52 | elif isinstance(value, list): 53 | # Format as DuckDB array: array ['item1', 'item2'] 54 | items = [f"'{item}'" for item in value] 55 | return f"{key} array [{', '.join(items)}]" 56 | elif key in unquoted_keys: 57 | return f"{key} {value}" 58 | else: 59 | return f"{key} '{value}'" 60 | 61 | def to_sql(self) -> str: 62 | name = f" {self.name}" if self.name else "" 63 | or_replace = " OR REPLACE" if name else "" 64 | persistent = " PERSISTENT" if self.persistent is True else "" 65 | tab = " " 66 | params = self.to_dict(omit_none=True) 67 | params.update(params.pop("secret_kwargs", {})) 68 | 69 | scope_value: Optional[List[str]] = None 70 | raw_scope = params.get("scope") 71 | if isinstance(raw_scope, str): 72 | scope_value = [raw_scope] 73 | elif isinstance(raw_scope, list): 74 | scope_value = raw_scope 75 | 76 | if scope_value is not None: 77 | params.pop("scope", None) 78 | params_sql: List[str] = [] 79 | for key, value in params.items(): 80 | if value is not None and key not in ["name", "persistent"]: 81 | params_sql.append(self._format_value(key, value)) 82 | for s in scope_value: 83 | params_sql.append(f"scope '{s}'") 84 | 85 | params_sql_str = f",\n{tab}".join(params_sql) 86 | else: 87 | params_sql_list = [ 88 | self._format_value(key, value) 89 | for key, value in params.items() 90 | if value is not None and key not in ["name", "persistent"] 91 | ] 92 | params_sql_str = f",\n{tab}".join(params_sql_list) 93 | 94 | sql = f"""CREATE{or_replace}{persistent} SECRET{name} (\n{tab}{params_sql_str}\n)""" 95 | return sql 96 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/motherduck.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from typing import Dict 3 | from urllib.parse import parse_qs 4 | from urllib.parse import urlparse 5 | 6 | from duckdb import DuckDBPyConnection 7 | 8 | from . import BasePlugin 9 | from dbt.adapters.duckdb.__version__ import version as __plugin_version__ 10 | from dbt.adapters.duckdb.credentials import DuckDBCredentials 11 | from dbt.version import __version__ 12 | 13 | CUSTOM_USER_AGENT = "custom_user_agent" 14 | MOTHERDUCK_EXT = "motherduck" 15 | # MotherDuck config options, in order in which they need to be set 16 | # (SaaS mode is last because it locks other config options) 17 | MOTHERDUCK_CONFIG_OPTIONS = [ 18 | "motherduck_token", 19 | "motherduck_attach_mode", 20 | "motherduck_saas_mode", 21 | ] 22 | 23 | 24 | class Plugin(BasePlugin): 25 | def initialize(self, plugin_config: Dict[str, Any]): 26 | self._config = plugin_config 27 | 28 | @staticmethod 29 | def get_config_from_path(path): 30 | return {key: value[0] for key, value in parse_qs(urlparse(path).query).items()} 31 | 32 | @staticmethod 33 | def get_md_config_settings(config): 34 | # Get MotherDuck config settings 35 | md_config = {} 36 | for name in MOTHERDUCK_CONFIG_OPTIONS: 37 | for key in [ 38 | name, 39 | name.replace("motherduck_", ""), 40 | name.upper(), 41 | name.replace("motherduck_", "").upper(), 42 | ]: 43 | if key in config: 44 | md_config[name] = config[key] 45 | 46 | # Sort values (SaaS mode should be set last) 47 | return dict( 48 | sorted( 49 | md_config.items(), 50 | key=lambda x: MOTHERDUCK_CONFIG_OPTIONS.index(x[0]), 51 | ) 52 | ) 53 | 54 | def configure_connection(self, conn: DuckDBPyConnection): 55 | conn.load_extension(MOTHERDUCK_EXT) 56 | # If a MotherDuck database is in attachments, 57 | # set config options *before* attaching 58 | if self.creds is not None and self.creds.is_motherduck_attach: 59 | config = {} 60 | 61 | # add config options specified in the path 62 | for attachment in self.creds.motherduck_attach: 63 | config.update(self.get_config_from_path(attachment.path)) 64 | 65 | # add config options specified via plugin config 66 | config.update(self._config) 67 | 68 | # add config options specified via settings 69 | if self.creds.settings is not None: 70 | config.update(self.creds.settings) 71 | 72 | # set MD config options and remove from settings 73 | for key, value in self.get_md_config_settings(config).items(): 74 | conn.execute(f"SET {key} = '{value}'") 75 | if self.creds.settings is not None and key in self.creds.settings: 76 | self.creds.settings.pop(key) 77 | 78 | def update_connection_config(self, creds: DuckDBCredentials, config: Dict[str, Any]): 79 | user_agent = f"dbt/{__version__} dbt-duckdb/{__plugin_version__}" 80 | settings: Dict[str, Any] = creds.settings or {} 81 | custom_user_agent = config.get(CUSTOM_USER_AGENT) or settings.pop(CUSTOM_USER_AGENT, None) 82 | if custom_user_agent: 83 | user_agent = f"{user_agent} {custom_user_agent}" 84 | config[CUSTOM_USER_AGENT] = user_agent 85 | 86 | # If a user specified MotherDuck config options via the plugin config, 87 | # pass it to the config kwarg in duckdb.connect. 88 | if not creds.is_motherduck_attach: 89 | config.update(self.get_md_config_settings(self._config)) 90 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - '[0-9]+.[0-9]+.[0-9]+' 7 | 8 | jobs: 9 | build: 10 | name: Build distribution 📦 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v6 15 | - name: Set up Python 16 | uses: actions/setup-python@v6 17 | with: 18 | python-version: "3.x" 19 | - name: Install pypa/build 20 | run: >- 21 | python3 -m 22 | pip install 23 | build 24 | --user 25 | - name: Build a binary wheel and a source tarball 26 | run: python3 -m build 27 | - name: Store the distribution packages 28 | uses: actions/upload-artifact@v6 29 | with: 30 | name: python-package-distributions 31 | path: dist/ 32 | 33 | publish-to-pypi: 34 | name: >- 35 | Publish Python 🐍 distribution 📦 to PyPI 36 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 37 | needs: 38 | - build 39 | runs-on: ubuntu-latest 40 | environment: 41 | name: pypi 42 | url: https://pypi.org/p/dbt-duckdb 43 | permissions: 44 | id-token: write # IMPORTANT: mandatory for trusted publishing 45 | 46 | steps: 47 | - name: Download all the dists 48 | uses: actions/download-artifact@v7 49 | with: 50 | name: python-package-distributions 51 | path: dist/ 52 | - name: Publish distribution 📦 to PyPI 53 | uses: pypa/gh-action-pypi-publish@release/v1 54 | 55 | github-release: 56 | name: >- 57 | Sign the Python 🐍 distribution 📦 with Sigstore 58 | and upload them to GitHub Release 59 | needs: 60 | - publish-to-pypi 61 | runs-on: ubuntu-latest 62 | 63 | permissions: 64 | contents: write # IMPORTANT: mandatory for making GitHub Releases 65 | id-token: write # IMPORTANT: mandatory for sigstore 66 | 67 | steps: 68 | - name: Download all the dists 69 | uses: actions/download-artifact@v7 70 | with: 71 | name: python-package-distributions 72 | path: dist/ 73 | - name: Sign the dists with Sigstore 74 | uses: sigstore/gh-action-sigstore-python@v3.2.0 75 | with: 76 | inputs: >- 77 | ./dist/*.tar.gz 78 | ./dist/*.whl 79 | - name: Create GitHub Release 80 | env: 81 | GITHUB_TOKEN: ${{ github.token }} 82 | run: >- 83 | gh release create 84 | '${{ github.ref_name }}' 85 | --repo '${{ github.repository }}' 86 | --title '${{ github.ref_name }}' 87 | --generate-notes 88 | - name: Upload artifact signatures to GitHub Release 89 | env: 90 | GITHUB_TOKEN: ${{ github.token }} 91 | # Upload to GitHub Release using the `gh` CLI. 92 | # `dist/` contains the built packages, and the 93 | # sigstore-produced signatures and certificates. 94 | run: >- 95 | gh release upload 96 | '${{ github.ref_name }}' dist/** 97 | --repo '${{ github.repository }}' 98 | 99 | notify-failure: 100 | name: Send Slack notification on failure 101 | if: failure() 102 | needs: [build, publish-to-pypi, github-release] 103 | runs-on: ubuntu-latest 104 | steps: 105 | - name: Send Slack notification 106 | uses: slackapi/slack-github-action@v2.1.1 107 | with: 108 | webhook: ${{ secrets.MOTHERDUCK_CI_NOTIFICATION_WEBHOOK }} 109 | webhook-type: webhook-trigger 110 | payload: | 111 | { 112 | "text": "dbt-duckdb release workflow failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" 113 | } 114 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/column.py: -------------------------------------------------------------------------------- 1 | import re 2 | from dataclasses import dataclass 3 | from dataclasses import field 4 | from typing import List 5 | 6 | from dbt.adapters.base.column import Column 7 | 8 | 9 | @dataclass 10 | class DuckDBColumn(Column): 11 | fields: List["DuckDBColumn"] = field(default_factory=list) 12 | 13 | def __post_init__(self): 14 | if self.is_struct(): 15 | self._parse_struct_fields() 16 | 17 | def _parse_struct_fields(self): 18 | # In DuckDB, structs are defined as STRUCT(key1 type1, key2 type2, ...) 19 | # We need to extract the key-type pairs from the struct definition 20 | # e.g., STRUCT(a VARCHAR, b INTEGER) -> ["a VARCHAR", "b INTEGER"] 21 | # We can't just split by comma, because types can contain commas 22 | # e.g. DECIMAL(10, 2) 23 | # The following logic will handle nested structs and complex types 24 | match = re.match(r"STRUCT\((.*)\)", self.dtype, re.IGNORECASE) 25 | if not match: 26 | return 27 | 28 | content = match.group(1) 29 | 30 | fields = [] 31 | paren_level = 0 32 | current_field = "" 33 | for char in content: 34 | if char == "(": 35 | paren_level += 1 36 | elif char == ")": 37 | paren_level -= 1 38 | 39 | if char == "," and paren_level == 0: 40 | fields.append(current_field.strip()) 41 | current_field = "" 42 | else: 43 | current_field += char 44 | fields.append(current_field.strip()) 45 | 46 | for f in fields: 47 | # Split on the first space to separate the name from the type 48 | parts = f.split(" ", 1) 49 | col_name = parts[0] 50 | col_type = parts[1] 51 | self.fields.append(DuckDBColumn(column=col_name, dtype=col_type)) 52 | 53 | def is_float(self): 54 | return self.dtype.lower() in { 55 | # floats 56 | "real", 57 | "float", 58 | "float4", 59 | "float8", 60 | "double", 61 | } 62 | 63 | def is_integer(self) -> bool: 64 | return self.dtype.lower() in { 65 | # signed types 66 | "tinyint", 67 | "smallint", 68 | "integer", 69 | "bigint", 70 | "hugeint", 71 | # unsigned types 72 | "utinyint", 73 | "usmallint", 74 | "uinteger", 75 | "ubigint", 76 | # aliases 77 | "int1", 78 | "int2", 79 | "int4", 80 | "int8", 81 | "short", 82 | "int", 83 | "signed", 84 | "long", 85 | } 86 | 87 | def is_struct(self) -> bool: 88 | return self.dtype.lower().startswith("struct") 89 | 90 | def flatten(self) -> List["DuckDBColumn"]: 91 | if not self.is_struct(): 92 | return [self] 93 | 94 | flat_columns: List["DuckDBColumn"] = [] 95 | for column_field in self.fields: 96 | if column_field.is_struct(): 97 | # Recursively flatten nested structs 98 | for nested_field in column_field.flatten(): 99 | flat_columns.append( 100 | DuckDBColumn( 101 | column=f"{self.column}.{nested_field.column}", 102 | dtype=nested_field.dtype, 103 | ) 104 | ) 105 | else: 106 | flat_columns.append( 107 | DuckDBColumn( 108 | column=f"{self.column}.{column_field.column}", 109 | dtype=column_field.dtype, 110 | ) 111 | ) 112 | return flat_columns 113 | -------------------------------------------------------------------------------- /tests/functional/plugins/test_delta.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pathlib import Path 3 | import pandas as pd 4 | import tempfile 5 | 6 | from dbt.tests.util import ( 7 | run_dbt, 8 | ) 9 | from deltalake.writer import write_deltalake 10 | 11 | delta_schema_yml = """ 12 | version: 2 13 | sources: 14 | - name: delta_source 15 | meta: 16 | plugin: delta 17 | tables: 18 | - name: table_1 19 | description: "An delta table" 20 | meta: 21 | delta_table_path: "{test_delta_path1}" 22 | 23 | - name: delta_source_test 24 | schema: test 25 | meta: 26 | plugin: delta 27 | tables: 28 | - name: table_2 29 | description: "An delta table" 30 | meta: 31 | delta_table_path: "{test_delta_path2}" 32 | as_of_version: 0 33 | """ 34 | 35 | 36 | delta1_sql = """ 37 | {{ config(materialized='table') }} 38 | select * from {{ source('delta_source', 'table_1') }} 39 | """ 40 | delta2_sql = """ 41 | {{ config(materialized='table') }} 42 | select * from {{ source('delta_source', 'table_1') }} limit 1 43 | """ 44 | delta3_sql = """ 45 | {{ config(materialized='table') }} 46 | select * as a from {{ source('delta_source_test', 'table_2') }} WHERE y = 'd' 47 | """ 48 | 49 | delta3_sql_expected = """ 50 | select 1 as x, 'a' as y 51 | """ 52 | 53 | 54 | @pytest.mark.skip_profile("buenavista", "md") 55 | class TestPlugins: 56 | @pytest.fixture(scope="class") 57 | def delta_test_table1(self): 58 | td = tempfile.TemporaryDirectory() 59 | path = Path(td.name) 60 | table_path = path / "test_delta_table1" 61 | 62 | df = pd.DataFrame({"x": [1, 2, 3]}) 63 | write_deltalake(table_path, df, mode="overwrite") 64 | 65 | yield table_path 66 | 67 | td.cleanup() 68 | 69 | @pytest.fixture(scope="class") 70 | def delta_test_table2(self): 71 | td = tempfile.TemporaryDirectory() 72 | path = Path(td.name) 73 | table_path = path / "test_delta_table2" 74 | 75 | df = pd.DataFrame({ 76 | "x": [1], 77 | "y": ["a"] 78 | }) 79 | write_deltalake(table_path, df, mode="overwrite") 80 | 81 | df = pd.DataFrame({ 82 | "x": [1, 2], 83 | "y": ["a","b"] 84 | }) 85 | write_deltalake(table_path, df, mode="overwrite") 86 | 87 | yield table_path 88 | 89 | td.cleanup() 90 | 91 | @pytest.fixture(scope="class") 92 | def profiles_config_update(self, dbt_profile_target): 93 | plugins = [{"module": "delta"}] 94 | return { 95 | "test": { 96 | "outputs": { 97 | "dev": { 98 | "type": "duckdb", 99 | "path": dbt_profile_target.get("path", ":memory:"), 100 | "plugins": plugins, 101 | } 102 | }, 103 | "target": "dev", 104 | } 105 | } 106 | 107 | @pytest.fixture(scope="class") 108 | def models(self, delta_test_table1,delta_test_table2): 109 | return { 110 | "source_schema.yml": delta_schema_yml.format( 111 | test_delta_path1=delta_test_table1, 112 | test_delta_path2=delta_test_table2 113 | ), 114 | "delta_table1.sql": delta1_sql, 115 | "delta_table2.sql": delta2_sql, 116 | "delta_table3.sql": delta3_sql, 117 | "delta_table3_expected.sql": delta3_sql_expected, 118 | } 119 | 120 | def test_plugins(self, project): 121 | results = run_dbt() 122 | assert len(results) == 4 123 | 124 | # check_relations_equal( 125 | # project.adapter, 126 | # [ 127 | # "delta_table3", 128 | # "delta_table3_expected", 129 | # ], 130 | # ) 131 | # res = project.run_sql("SELECT count(1) FROM 'delta_table3'", fetch="one") 132 | # assert res[0] == 2 133 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/relation.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from string import Template 3 | from typing import Any 4 | from typing import Optional 5 | from typing import Type 6 | 7 | from .connections import DuckDBConnectionManager 8 | from .utils import SourceConfig 9 | from dbt.adapters.base.relation import BaseRelation 10 | from dbt.adapters.contracts.relation import HasQuoting 11 | from dbt.adapters.contracts.relation import RelationConfig 12 | 13 | 14 | @dataclass(frozen=True, eq=False, repr=False) 15 | class DuckDBRelation(BaseRelation): 16 | require_alias: bool = False 17 | external: Optional[str] = None 18 | 19 | @classmethod 20 | def create_from( 21 | cls: Type["DuckDBRelation"], 22 | quoting: HasQuoting, 23 | relation_config: RelationConfig, 24 | **kwargs: Any, 25 | ) -> "DuckDBRelation": 26 | if relation_config.resource_type == "source": 27 | return cls.create_from_source(quoting, relation_config, **kwargs) 28 | else: 29 | return super().create_from(quoting, relation_config, **kwargs) 30 | 31 | @classmethod 32 | def create_from_source( 33 | cls: Type["DuckDBRelation"], quoting: HasQuoting, source: RelationConfig, **kwargs: Any 34 | ) -> "DuckDBRelation": 35 | """ 36 | This method creates a new DuckDBRelation instance from a source definition. 37 | It first checks if a 'plugin' is defined in the meta argument for the source or its parent configuration. 38 | If a 'plugin' is defined, it uses the environment associated with this run to get the name of the source that we should reference in the compiled model. 39 | If an 'external_location' is defined, it formats the location based on the 'formatter' defined in the source configuration. 40 | If the 'formatter' is not recognized, it raises a ValueError. 41 | Finally, it calls the parent class's create_from_source method to create the DuckDBRelation instance. 42 | 43 | :param cls: The class that this method is a part of. 44 | :param source: The source definition to create the DuckDBRelation from. 45 | :param kwargs: Additional keyword arguments. 46 | :return: A new DuckDBRelation instance. 47 | """ 48 | source_config = SourceConfig.create_from_source(source) 49 | # First check to see if a 'plugin' is defined in the meta argument for 50 | # the source or its parent configuration, and if it is, use the environment 51 | # associated with this run to get the name of the source that we should 52 | # reference in the compiled model 53 | if "plugin" in source_config: 54 | plugin_name = source_config["plugin"] 55 | if DuckDBConnectionManager._ENV is not None: 56 | # No connection means we are probably in the dbt parsing phase, so don't load yet. 57 | DuckDBConnectionManager.env().load_source(plugin_name, source_config) 58 | elif "external_location" in source_config: 59 | ext_location_template = source_config["external_location"] 60 | formatter = source_config.get("formatter", "newstyle") 61 | if formatter == "newstyle": 62 | ext_location = ext_location_template.format_map(source_config.as_dict()) 63 | elif formatter == "oldstyle": 64 | ext_location = ext_location_template % source_config.as_dict() 65 | elif formatter == "template": 66 | ext_location = Template(ext_location_template).substitute(source_config.as_dict()) 67 | else: 68 | raise ValueError( 69 | f"Formatter {formatter} not recognized. Must be one of 'newstyle', 'oldstyle', or 'template'." 70 | ) 71 | 72 | # If it's a function call or already has single quotes, don't add them 73 | if "(" not in ext_location and not ext_location.startswith("'"): 74 | ext_location = f"'{ext_location}'" 75 | kwargs["external"] = ext_location 76 | 77 | return super().create_from(quoting, source, **kwargs) # type: ignore 78 | 79 | def render(self) -> str: 80 | if self.external: 81 | return self.external 82 | else: 83 | return super().render() 84 | -------------------------------------------------------------------------------- /tests/functional/plugins/test_plugins.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import sqlite3 4 | 5 | from dbt.tests.util import ( 6 | check_relations_equal, 7 | run_dbt, 8 | ) 9 | 10 | sqlalchemy_schema_yml = """ 11 | version: 2 12 | sources: 13 | - name: sql_source 14 | schema: main 15 | config: 16 | plugin: sql 17 | save_mode: ignore 18 | tables: 19 | - name: tt1 20 | description: "My first SQLAlchemy table" 21 | config: 22 | query: "SELECT * FROM {identifier} WHERE id=:id" 23 | params: 24 | id: 1 25 | - name: tt2 26 | config: 27 | table: "test_table2" 28 | """ 29 | 30 | 31 | sqlalchemy1_sql = """ 32 | select * from {{ source('sql_source', 'tt1') }} 33 | """ 34 | sqlalchemy2_sql = """ 35 | {{ config(materialized='external', plugin='sql') }} 36 | select * from {{ source('sql_source', 'tt2') }} 37 | """ 38 | plugin_sql = """ 39 | {{ config(materialized='external', plugin='cfp', key='value') }} 40 | select foo() as foo 41 | """ 42 | 43 | 44 | @pytest.mark.skip_profile("buenavista", "md") 45 | class TestPlugins: 46 | @pytest.fixture(scope="class") 47 | def sqlite_test_db(self): 48 | path = "/tmp/satest.db" 49 | db = sqlite3.connect(path) 50 | cursor = db.cursor() 51 | cursor.execute("CREATE TABLE tt1 (id int, name text)") 52 | cursor.execute("INSERT INTO tt1 VALUES (1, 'John Doe')") 53 | cursor.execute("INSERT INTO tt1 VALUES (2, 'Jane Smith')") 54 | cursor.execute("CREATE TABLE test_table2 (a int, b int, c int)") 55 | cursor.execute("INSERT INTO test_table2 VALUES (1, 2, 3), (4, 5, 6)") 56 | cursor.close() 57 | db.commit() 58 | db.close() 59 | 60 | yield path 61 | 62 | # verify that the external plugin operation works to write to the db 63 | db = sqlite3.connect(path) 64 | cursor = db.cursor() 65 | res = cursor.execute("SELECT * FROM sqlalchemy2").fetchall() 66 | assert len(res) == 2 67 | assert res[0] == (1, 2, 3) 68 | assert res[1] == (4, 5, 6) 69 | cursor.close() 70 | db.close() 71 | 72 | os.unlink(path) 73 | 74 | @pytest.fixture(scope="class") 75 | def profiles_config_update(self, dbt_profile_target, sqlite_test_db): 76 | sa_config = {"connection_url": f"sqlite:///{sqlite_test_db}"} 77 | plugins = [ 78 | {"module": "sqlalchemy", "alias": "sql", "config": sa_config}, 79 | {"module": "tests.create_function_plugin", "alias": "cfp"}, 80 | ] 81 | 82 | return { 83 | "test": { 84 | "outputs": { 85 | "dev": { 86 | "type": "duckdb", 87 | "path": dbt_profile_target.get("path", ":memory:"), 88 | "plugins": plugins, 89 | "retries": {"query_attempts": 2}, 90 | } 91 | }, 92 | "target": "dev", 93 | } 94 | } 95 | 96 | @pytest.fixture(scope="class") 97 | def models(self, test_data_path): 98 | return { 99 | "schema_sqlalchemy.yml": sqlalchemy_schema_yml, 100 | "sqlalchemy1.sql": sqlalchemy1_sql, 101 | "sqlalchemy2.sql": sqlalchemy2_sql, 102 | "foo.sql": plugin_sql, 103 | } 104 | 105 | def test_plugins(self, project): 106 | results = run_dbt() 107 | assert len(results) == 3 108 | 109 | res = project.run_sql("SELECT COUNT(1) FROM tt1", fetch="one") 110 | assert res[0] == 1 111 | check_relations_equal( 112 | project.adapter, 113 | [ 114 | "tt1", 115 | "sqlalchemy1", 116 | ], 117 | ) 118 | 119 | res = project.run_sql("SELECT COUNT(1) FROM tt2", fetch="one") 120 | assert res[0] == 2 121 | check_relations_equal( 122 | project.adapter, 123 | [ 124 | "tt2", 125 | "sqlalchemy2", 126 | ], 127 | ) 128 | 129 | res = project.run_sql("SELECT foo FROM foo", fetch="one") 130 | assert res[0] == 1729 131 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_ephemeral.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import pytest 5 | from dbt.tests.adapter.ephemeral.test_ephemeral import ( 6 | BaseEphemeral, 7 | BaseEphemeralMulti, 8 | ephemeral_errors__base__base_copy_sql, 9 | ephemeral_errors__base__base_sql, 10 | ephemeral_errors__dependent_sql, 11 | models_n__ephemeral_level_two_sql, 12 | models_n__ephemeral_sql, 13 | models_n__root_view_sql, 14 | models_n__source_table_sql, 15 | ) 16 | from dbt.tests.util import check_relations_equal, run_dbt 17 | 18 | 19 | class TestEphemeralMulti(BaseEphemeralMulti): 20 | def test_ephemeral_multi(self, project): 21 | db = project.database 22 | 23 | run_dbt(["seed"]) 24 | results = run_dbt(["run"]) 25 | assert len(results) == 3 26 | 27 | check_relations_equal(project.adapter, ["seed", "dependent"]) 28 | check_relations_equal(project.adapter, ["seed", "double_dependent"]) 29 | check_relations_equal(project.adapter, ["seed", "super_dependent"]) 30 | assert os.path.exists("./target/run/test/models/double_dependent.sql") 31 | with open("./target/run/test/models/double_dependent.sql", "r") as fp: 32 | sql_file = fp.read() 33 | 34 | sql_file = re.sub(r"\d+", "", sql_file) 35 | expected_sql = ( 36 | f'create view "{db}"."test_test_ephemeral"."double_dependent__dbt_tmp" as (' 37 | "with __dbt__cte__base as (" 38 | "select * from test_test_ephemeral.seed" 39 | "), __dbt__cte__base_copy as (" 40 | "select * from __dbt__cte__base" 41 | ")-- base_copy just pulls from base. Make sure the listed" 42 | "-- graph of CTEs all share the same dbt_cte__base cte" 43 | "select * from __dbt__cte__base where gender = 'Male'" 44 | "union all" 45 | "select * from __dbt__cte__base_copy where gender = 'Female'" 46 | ");" 47 | ) 48 | sql_file = "".join(sql_file.split()) 49 | expected_sql = "".join(expected_sql.split()) 50 | assert sql_file == expected_sql 51 | 52 | 53 | class TestEphemeralNested(BaseEphemeral): 54 | @pytest.fixture(scope="class") 55 | def models(self): 56 | return { 57 | "ephemeral_level_two.sql": models_n__ephemeral_level_two_sql, 58 | "root_view.sql": models_n__root_view_sql, 59 | "ephemeral.sql": models_n__ephemeral_sql, 60 | "source_table.sql": models_n__source_table_sql, 61 | } 62 | 63 | def test_ephemeral_nested(self, project): 64 | db = project.database 65 | 66 | results = run_dbt(["run"]) 67 | assert len(results) == 2 68 | assert os.path.exists("./target/run/test/models/root_view.sql") 69 | with open("./target/run/test/models/root_view.sql", "r") as fp: 70 | sql_file = fp.read() 71 | 72 | sql_file = re.sub(r"\d+", "", sql_file) 73 | expected_sql = ( 74 | f'create view "{db}"."test_test_ephemeral"."root_view__dbt_tmp" as (' 75 | "with __dbt__cte__ephemeral_level_two as (" 76 | f'select * from "{db}"."test_test_ephemeral"."source_table"' 77 | "), __dbt__cte__ephemeral as (" 78 | "select * from __dbt__cte__ephemeral_level_two" 79 | ")select * from __dbt__cte__ephemeral" 80 | ");" 81 | ) 82 | 83 | sql_file = "".join(sql_file.split()) 84 | expected_sql = "".join(expected_sql.split()) 85 | assert sql_file == expected_sql 86 | 87 | 88 | class TestEphemeralErrorHandling(BaseEphemeral): 89 | @pytest.fixture(scope="class") 90 | def models(self): 91 | return { 92 | "dependent.sql": ephemeral_errors__dependent_sql, 93 | "base": { 94 | "base.sql": ephemeral_errors__base__base_sql, 95 | "base_copy.sql": ephemeral_errors__base__base_copy_sql, 96 | }, 97 | } 98 | 99 | def test_ephemeral_error_handling(self, project): 100 | results = run_dbt(["run"], expect_pass=False) 101 | assert len(results) == 1 102 | assert results[0].status == "skipped" 103 | assert "Compilation Error" in results[0].message 104 | -------------------------------------------------------------------------------- /tests/functional/plugins/motherduck/test_macros.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test that the generate database name macro is case insensitive 3 | 4 | See DuckDB docs: https://duckdb.org/docs/sql/dialect/keywords_and_identifiers.html 5 | 6 | "Identifiers in DuckDB are always case-insensitive, similarly to PostgreSQL. 7 | However, unlike PostgreSQL (and some other major SQL implementations), DuckDB also 8 | treats quoted identifiers as case-insensitive." 9 | """ 10 | from urllib.parse import urlparse 11 | import pytest 12 | 13 | from dbt.tests.util import ( 14 | run_dbt, 15 | check_result_nodes_by_name 16 | ) 17 | from tests.functional.plugins.motherduck.fixtures import ( 18 | models__gen_data_macro, 19 | macros__generate_database_name, 20 | macros__generate_schema_name, 21 | seeds__example_seed_csv, 22 | ) 23 | 24 | 25 | @pytest.mark.skip_profile("buenavista", "file", "memory") 26 | class TestMacrosGenerateDatabaseName: 27 | @pytest.fixture(scope="class") 28 | def database_name(self, dbt_profile_target, request): 29 | return urlparse(dbt_profile_target["path"]).path + "_ducky_ducky" 30 | 31 | @pytest.fixture(autouse=True) 32 | def run_dbt_scope(self, project, database_name): 33 | project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}") 34 | yield 35 | project.run_sql(f"DROP DATABASE {database_name}") 36 | 37 | @pytest.fixture(scope="class") 38 | def seeds(self): 39 | return { 40 | "seed.csv": seeds__example_seed_csv, 41 | } 42 | 43 | @pytest.fixture(scope="class") 44 | def models(self): 45 | return { 46 | "model.sql": models__gen_data_macro 47 | } 48 | 49 | @pytest.fixture(scope="class") 50 | def macros(self): 51 | return { 52 | "db_name.sql": macros__generate_database_name, 53 | "schema_name.sql": macros__generate_schema_name 54 | } 55 | 56 | @staticmethod 57 | def gen_project_config_update(build_env, org_prefix): 58 | return { 59 | "config-version": 2, 60 | "vars": { 61 | "test": { 62 | "build_env": build_env, 63 | "org_prefix": org_prefix 64 | }, 65 | }, 66 | "macro-paths": ["macros"], 67 | } 68 | 69 | @pytest.fixture(scope="class") 70 | def project_config_update(self): 71 | return self.gen_project_config_update("ducky", "ducky") 72 | 73 | def test_dbname_macro(self, project): 74 | # seed command 75 | results = run_dbt(["seed"]) 76 | assert len(results) == 1 77 | check_result_nodes_by_name(results, ["seed"]) 78 | 79 | for _ in range(3): 80 | results = run_dbt(["run"]) 81 | assert len(results) == 1 82 | check_result_nodes_by_name(results, ["model"]) 83 | 84 | 85 | @pytest.mark.skip_profile("buenavista", "file", "memory") 86 | class TestMacrosGenerateDatabaseNameUpperCase(TestMacrosGenerateDatabaseName): 87 | @pytest.fixture(scope="class") 88 | def database_name(self, dbt_profile_target, request): 89 | return urlparse(dbt_profile_target["path"]).path + "_ducky_ducky" 90 | 91 | @pytest.fixture(scope="class") 92 | def project_config_update(self): 93 | return self.gen_project_config_update("DUCKY", "DUCKY") 94 | 95 | 96 | @pytest.mark.skip_profile("buenavista", "file", "memory") 97 | class TestMacrosGenerateDatabaseNameLowerCase(TestMacrosGenerateDatabaseName): 98 | @pytest.fixture(scope="class") 99 | def database_name(self, dbt_profile_target, request): 100 | return urlparse(dbt_profile_target["path"]).path + "_DUCKY_DUCKY" 101 | 102 | @pytest.fixture(scope="class") 103 | def project_config_update(self): 104 | return self.gen_project_config_update("ducky", "ducky") 105 | 106 | 107 | @pytest.mark.skip_profile("buenavista", "file", "memory") 108 | class TestMacrosGenerateDatabaseNameAllMixedCase(TestMacrosGenerateDatabaseName): 109 | @pytest.fixture(scope="class") 110 | def database_name(self, dbt_profile_target, request): 111 | return urlparse(dbt_profile_target["path"]).path + "_dUcKy_DUckY" 112 | 113 | @pytest.fixture(scope="class") 114 | def project_config_update(self): 115 | return self.gen_project_config_update("DuCkY", "dUcKy") 116 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import resource 3 | import subprocess 4 | import time 5 | from importlib import metadata 6 | 7 | import duckdb 8 | import pytest 9 | 10 | # Increase the number of open files allowed 11 | # Hack for https://github.com/dbt-labs/dbt-core/issues/7316 12 | soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE) 13 | resource.setrlimit(resource.RLIMIT_NOFILE, (hard_limit, hard_limit)) 14 | 15 | # Import the standard functional fixtures as a plugin 16 | # Note: fixtures with session scope need to be local 17 | pytest_plugins = ["dbt.tests.fixtures.project"] 18 | 19 | MOTHERDUCK_TOKEN = "MOTHERDUCK_TOKEN" 20 | TEST_MOTHERDUCK_TOKEN = "TEST_MOTHERDUCK_TOKEN" 21 | 22 | 23 | def pytest_addoption(parser): 24 | parser.addoption("--profile", action="store", default="memory", type=str) 25 | 26 | 27 | def pytest_report_header() -> list[str]: 28 | """Return a list of strings to be displayed in the header of the report.""" 29 | return [ 30 | f"duckdb: {metadata.version('duckdb')}", 31 | f"dbt-core: {metadata.version('dbt-core')}", 32 | ] 33 | 34 | 35 | @pytest.fixture(scope="session") 36 | def profile_type(request): 37 | return request.config.getoption("--profile") 38 | 39 | 40 | @pytest.fixture(scope="session") 41 | def bv_server_process(profile_type): 42 | if profile_type == "buenavista": 43 | server_process = subprocess.Popen(["python3", "-m", "tests.bv_test_server"]) 44 | 45 | # Wait for the server to be ready 46 | time.sleep(5) 47 | 48 | # Pass the server process to the tests 49 | yield server_process 50 | 51 | # Teardown: Stop the server process after tests are done 52 | server_process.terminate() 53 | server_process.wait() 54 | else: 55 | yield None 56 | 57 | 58 | # The profile dictionary, used to write out profiles.yml 59 | # dbt will supply a unique schema per test, so we do not specify 'schema' here 60 | @pytest.fixture(scope="session") 61 | def dbt_profile_target(profile_type, bv_server_process, tmpdir_factory): 62 | profile = {"type": "duckdb", "threads": 4} 63 | 64 | if profile_type == "buenavista": 65 | profile["database"] = "memory" 66 | profile["remote"] = { 67 | "host": "127.0.0.1", 68 | "port": 5433, 69 | "user": "test", 70 | } 71 | elif profile_type == "file": 72 | profile["path"] = str(tmpdir_factory.mktemp("dbs") / "tmp.db") 73 | elif profile_type == "md": 74 | # Test against MotherDuck 75 | if MOTHERDUCK_TOKEN not in os.environ and MOTHERDUCK_TOKEN.lower() not in os.environ: 76 | if TEST_MOTHERDUCK_TOKEN not in os.environ: 77 | raise ValueError( 78 | f"Please set the {MOTHERDUCK_TOKEN} or {TEST_MOTHERDUCK_TOKEN} \ 79 | environment variable to run tests against MotherDuck" 80 | ) 81 | profile["token"] = os.environ.get(TEST_MOTHERDUCK_TOKEN) 82 | else: 83 | profile["token"] = os.environ.get(MOTHERDUCK_TOKEN, os.environ.get(MOTHERDUCK_TOKEN.lower())) 84 | profile["disable_transactions"] = True 85 | profile["path"] = "md:test" 86 | elif profile_type in ["memory", "nightly"]: 87 | pass # use the default path-less profile 88 | else: 89 | raise ValueError(f"Invalid profile type '{profile_type}'") 90 | 91 | return profile 92 | 93 | 94 | @pytest.fixture(autouse=True, scope="class") 95 | def skip_by_profile_type(profile_type, request): 96 | if request.node.get_closest_marker("skip_profile"): 97 | for skip_profile_type in request.node.get_closest_marker("skip_profile").args: 98 | if skip_profile_type == profile_type: 99 | pytest.skip(f"skipped on '{profile_type}' profile") 100 | 101 | 102 | @pytest.fixture(scope="session") 103 | def test_data_path(): 104 | test_dir = os.path.dirname(os.path.abspath(__file__)) 105 | return os.path.join(test_dir, "data") 106 | 107 | 108 | def pytest_collection_modifyitems(config, items): 109 | # Skip the S3 tests if the secrets are not available 110 | if not ( 111 | os.getenv("S3_MD_ORG_KEY") and os.getenv("S3_MD_ORG_REGION") and os.getenv("S3_MD_ORG_SECRET") 112 | ): 113 | skip_s3 = pytest.mark.skip(reason="need S3 credentials to run this test") 114 | for item in items: 115 | if "with_s3_creds" in item.keywords: 116 | item.add_marker(skip_s3) 117 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.utils.test_any_value import BaseAnyValue 4 | from dbt.tests.adapter.utils.test_array_append import BaseArrayAppend 5 | from dbt.tests.adapter.utils.test_array_concat import BaseArrayConcat 6 | from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct 7 | from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr 8 | from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText 9 | from dbt.tests.adapter.utils.test_concat import BaseConcat 10 | from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive 11 | from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc 12 | from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd 13 | from dbt.tests.adapter.utils.test_datediff import BaseDateDiff 14 | from dbt.tests.adapter.utils.test_date_spine import BaseDateSpine 15 | from dbt.tests.adapter.utils.test_escape_single_quotes import ( 16 | BaseEscapeSingleQuotesQuote, 17 | ) 18 | from dbt.tests.adapter.utils.test_except import BaseExcept 19 | from dbt.tests.adapter.utils.test_generate_series import BaseGenerateSeries 20 | from dbt.tests.adapter.utils.test_get_intervals_between import BaseGetIntervalsBetween 21 | from dbt.tests.adapter.utils.test_get_powers_of_two import BaseGetPowersOfTwo 22 | from dbt.tests.adapter.utils.test_hash import BaseHash 23 | from dbt.tests.adapter.utils.test_intersect import BaseIntersect 24 | from dbt.tests.adapter.utils.test_last_day import BaseLastDay 25 | from dbt.tests.adapter.utils.test_length import BaseLength 26 | from dbt.tests.adapter.utils.test_listagg import BaseListagg 27 | from dbt.tests.adapter.utils.test_position import BasePosition 28 | from dbt.tests.adapter.utils.test_replace import BaseReplace 29 | from dbt.tests.adapter.utils.test_right import BaseRight 30 | from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast 31 | from dbt.tests.adapter.utils.test_split_part import BaseSplitPart 32 | from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral 33 | 34 | 35 | class TestAnyValue(BaseAnyValue): 36 | pass 37 | 38 | 39 | class TestBoolOr(BaseBoolOr): 40 | pass 41 | 42 | 43 | class TestCastBoolToText(BaseCastBoolToText): 44 | pass 45 | 46 | 47 | class TestConcat(BaseConcat): 48 | pass 49 | 50 | 51 | class TestDateAdd(BaseDateAdd): 52 | pass 53 | 54 | 55 | class TestDateDiff(BaseDateDiff): 56 | pass 57 | 58 | 59 | # Skipping this b/c the upstream utils test 60 | # is irritatingly adapter-specific at the moment 61 | @pytest.mark.skip 62 | class TestDateSpine(BaseDateSpine): 63 | pass 64 | 65 | class TestDateTrunc(BaseDateTrunc): 66 | pass 67 | 68 | 69 | class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote): 70 | pass 71 | 72 | 73 | class TestGenerateSeries(BaseGenerateSeries): 74 | pass 75 | 76 | 77 | # Skipping this b/c the upstream utils test 78 | # is irritatingly adapter-specific at the moment 79 | @pytest.mark.skip 80 | class TestGetIntervalsBetween(BaseGetIntervalsBetween): 81 | pass 82 | 83 | 84 | class TestGetPowersOfTwo(BaseGetPowersOfTwo): 85 | pass 86 | 87 | 88 | class TestExcept(BaseExcept): 89 | pass 90 | 91 | 92 | class TestHash(BaseHash): 93 | pass 94 | 95 | 96 | class TestIntersect(BaseIntersect): 97 | pass 98 | 99 | 100 | class TestLastDay(BaseLastDay): 101 | pass 102 | 103 | 104 | class TestLength(BaseLength): 105 | pass 106 | 107 | 108 | # NOTE: list_agg relies on an ORDER BY construct MD does not yet support 109 | @pytest.mark.skip_profile("md") 110 | class TestListagg(BaseListagg): 111 | pass 112 | 113 | 114 | class TestPosition(BasePosition): 115 | pass 116 | 117 | 118 | class TestReplace(BaseReplace): 119 | pass 120 | 121 | 122 | class TestRight(BaseRight): 123 | pass 124 | 125 | 126 | class TestSafeCast(BaseSafeCast): 127 | pass 128 | 129 | 130 | class TestSplitPart(BaseSplitPart): 131 | pass 132 | 133 | 134 | class TestStringLiteral(BaseStringLiteral): 135 | pass 136 | 137 | 138 | class TestArrayAppend(BaseArrayAppend): 139 | pass 140 | 141 | 142 | class TestArrayConcat(BaseArrayConcat): 143 | pass 144 | 145 | 146 | class TestArrayConstruct(BaseArrayConstruct): 147 | pass 148 | 149 | 150 | # Skipping this while we sort out what the right default is 151 | # here: https://github.com/duckdb/duckdb/issues/7934 152 | @pytest.mark.skip 153 | class TestCurrentTimestamp(BaseCurrentTimestampNaive): 154 | pass 155 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_write_options.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from dbt.tests.adapter.basic.files import ( 4 | base_table_sql, 5 | model_base, 6 | schema_base_yml, 7 | seeds_base_csv, 8 | ) 9 | from dbt.tests.util import ( 10 | check_relation_types, 11 | check_relations_equal, 12 | check_result_nodes_by_name, 13 | relation_from_name, 14 | run_dbt, 15 | ) 16 | 17 | config_write_csv_delim_options = """ 18 | {{ config(materialized="external", format="csv", options={"delimiter": "|"}) }} 19 | """ 20 | 21 | config_write_codec_options = """ 22 | {{ config(materialized="external", options={"codec": "zstd"}) }} 23 | """ 24 | 25 | config_write_partition_by_id = """ 26 | {{ config(materialized="external", options={"partition_by": "id", "codec": "zstd"}) }} 27 | """ 28 | 29 | config_write_partition_by_id_name = """ 30 | {{ config(materialized="external", options={"partition_by": "id, name"}) }} 31 | """ 32 | 33 | csv_delim_options_sql = config_write_csv_delim_options + model_base 34 | write_codec_options = config_write_codec_options + model_base 35 | config_write_partition_by_id_sql = config_write_partition_by_id + model_base 36 | config_write_partition_by_id_name_sql = config_write_partition_by_id_name + model_base 37 | 38 | 39 | class BaseExternalMaterializations: 40 | 41 | @pytest.fixture(scope="class") 42 | def dbt_profile_target(self, dbt_profile_target, tmp_path_factory): 43 | extroot = str(tmp_path_factory.getbasetemp() / "write_options") 44 | os.mkdir(extroot) 45 | dbt_profile_target["external_root"] = extroot 46 | return dbt_profile_target 47 | 48 | @pytest.fixture(scope="class") 49 | def models(self): 50 | return { 51 | "table_model.sql": base_table_sql, 52 | "csv_delim_options.sql": csv_delim_options_sql, 53 | "write_codec_options.sql": write_codec_options, 54 | "config_write_partition_by_id.sql": config_write_partition_by_id_sql, 55 | "config_write_partition_by_id_name.sql": config_write_partition_by_id_name_sql, 56 | "schema.yml": schema_base_yml, 57 | } 58 | 59 | @pytest.fixture(scope="class") 60 | def seeds(self): 61 | return { 62 | "base.csv": seeds_base_csv, 63 | } 64 | 65 | @pytest.fixture(scope="class") 66 | def project_config_update(self): 67 | return { 68 | "name": "base", 69 | } 70 | 71 | def test_base(self, project): 72 | 73 | # seed command 74 | results = run_dbt(["seed"]) 75 | # seed result length 76 | assert len(results) == 1 77 | 78 | # run command 79 | results = run_dbt() 80 | # run result length 81 | assert len(results) == 5 82 | 83 | # names exist in result nodes 84 | check_result_nodes_by_name( 85 | results, 86 | [ 87 | "table_model", 88 | "csv_delim_options", 89 | "write_codec_options", 90 | "config_write_partition_by_id", 91 | "config_write_partition_by_id_name", 92 | ], 93 | ) 94 | 95 | # check relation types 96 | expected = { 97 | "base": "table", 98 | "table_model": "table", 99 | "csv_delim_options": "view", 100 | "write_codec_options": "view", 101 | "config_write_partition_by_id": "view", 102 | "config_write_partition_by_id_name": "view", 103 | } 104 | check_relation_types(project.adapter, expected) 105 | 106 | # base table rowcount 107 | relation = relation_from_name(project.adapter, "base") 108 | result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") 109 | assert result[0] == 10 110 | 111 | # relations_equal 112 | check_relations_equal( 113 | project.adapter, 114 | [ 115 | "base", 116 | "csv_delim_options", 117 | "write_codec_options", 118 | "config_write_partition_by_id", 119 | "config_write_partition_by_id_name", 120 | ], 121 | ) 122 | 123 | # check relations in catalog 124 | catalog = run_dbt(["docs", "generate"]) 125 | assert len(catalog.nodes) == 6 126 | assert len(catalog.sources) == 1 127 | 128 | 129 | class TestExternalMaterializations(BaseExternalMaterializations): 130 | pass 131 | -------------------------------------------------------------------------------- /tests/functional/adapter/indexes/test_indexes.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import re 3 | from dbt.tests.util import ( 4 | run_dbt, 5 | run_dbt_and_capture, 6 | ) 7 | from tests.functional.adapter.indexes.fixtures import ( 8 | models__incremental_sql, 9 | models__table_sql, 10 | seeds__seed_csv, 11 | snapshots__colors_sql, 12 | ) 13 | 14 | 15 | INDEX_DEFINITION_PATTERN = re.compile(r"\((.*?)\)") 16 | 17 | 18 | class TestIndex: 19 | @pytest.fixture(scope="class") 20 | def models(self): 21 | return { 22 | "table.sql": models__table_sql, 23 | "incremental.sql": models__incremental_sql, 24 | } 25 | 26 | @pytest.fixture(scope="class") 27 | def seeds(self): 28 | return {"seed.csv": seeds__seed_csv} 29 | 30 | @pytest.fixture(scope="class") 31 | def snapshots(self): 32 | return {"colors.sql": snapshots__colors_sql} 33 | 34 | @pytest.fixture(scope="class") 35 | def project_config_update(self): 36 | return { 37 | "config-version": 2, 38 | "seeds": { 39 | "quote_columns": False, 40 | "indexes": [ 41 | {"columns": ["country_code"], "unique": False}, 42 | {"columns": ["country_code", "country_name"], "unique": True}, 43 | ], 44 | }, 45 | "vars": { 46 | "version": 1, 47 | }, 48 | } 49 | 50 | def test_table(self, project, unique_schema): 51 | for _ in range(2): 52 | results = run_dbt(["run", "--models", "table"]) 53 | assert len(results) == 1 54 | 55 | indexes = self.get_indexes("table", project, unique_schema) 56 | expected = [ 57 | {"columns": "column_a", "unique": False}, 58 | {"columns": "column_b", "unique": False}, 59 | {"columns": "column_a, column_b", "unique": False}, 60 | {"columns": "column_b, column_a", "unique": True}, 61 | {"columns": "column_a", "unique": False}, 62 | ] 63 | assert len(indexes) == len(expected) 64 | 65 | def test_incremental(self, project, unique_schema): 66 | for additional_argument in [[], [], ["--full-refresh"]]: 67 | results = run_dbt(["run", "--models", "incremental"] + additional_argument) 68 | assert len(results) == 1 69 | 70 | indexes = self.get_indexes("incremental", project, unique_schema) 71 | expected = [ 72 | {"columns": "column_a", "unique": False}, 73 | {"columns": "column_a, column_b", "unique": True}, 74 | ] 75 | assert len(indexes) == len(expected) 76 | 77 | def test_seed(self, project, unique_schema): 78 | for additional_argument in [[], [], ["--full-refresh"]]: 79 | results = run_dbt(["seed"] + additional_argument) 80 | assert len(results) == 1 81 | 82 | indexes = self.get_indexes("seed", project, unique_schema) 83 | expected = [ 84 | {"columns": "country_code", "unique": False}, 85 | { 86 | "columns": "country_code, country_name", 87 | "unique": True, 88 | }, 89 | ] 90 | assert len(indexes) == len(expected) 91 | 92 | def test_snapshot(self, project, unique_schema): 93 | for version in [1, 2]: 94 | results = run_dbt(["snapshot", "--vars", f"version: {version}"]) 95 | assert len(results) == 1 96 | 97 | indexes = self.get_indexes("colors", project, unique_schema) 98 | expected = [ 99 | {"columns": "id", "unique": False}, 100 | {"columns": "id, color", "unique": True}, 101 | ] 102 | assert len(indexes) == len(expected) 103 | 104 | def get_indexes(self, table_name, project, unique_schema): 105 | sql = f""" 106 | SELECT 107 | sql as index_definition, is_unique 108 | FROM duckdb_indexes() 109 | WHERE 110 | schema_name = '{unique_schema}' 111 | AND 112 | table_name = '{table_name}' 113 | """ 114 | results = project.run_sql(sql, fetch="all") 115 | return [self.parse_index_definition(row[0], row[1]) for row in results] 116 | 117 | def parse_index_definition(self, index_definition, is_unique): 118 | index_definition = index_definition.lower() 119 | m = INDEX_DEFINITION_PATTERN.search(index_definition) 120 | return { 121 | "columns": m.group(1), 122 | "unique": is_unique, 123 | } 124 | 125 | 126 | -------------------------------------------------------------------------------- /tests/unit/utils.py: -------------------------------------------------------------------------------- 1 | """Unit test utility functions. 2 | 3 | Note that all imports should be inside the functions to avoid import/mocking 4 | issues. 5 | """ 6 | import os 7 | from unittest import mock 8 | 9 | from dbt.config.project import PartialProject 10 | 11 | 12 | def normalize(path): 13 | """On windows, neither is enough on its own: 14 | 15 | >>> normcase('C:\\documents/ALL CAPS/subdir\\..') 16 | 'c:\\documents\\all caps\\subdir\\..' 17 | >>> normpath('C:\\documents/ALL CAPS/subdir\\..') 18 | 'C:\\documents\\ALL CAPS' 19 | >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..')) 20 | 'c:\\documents\\all caps' 21 | """ 22 | return os.path.normcase(os.path.normpath(path)) 23 | 24 | 25 | class Obj: 26 | which = "blah" 27 | single_threaded = False 28 | 29 | 30 | def mock_connection(name): 31 | conn = mock.MagicMock() 32 | conn.name = name 33 | return conn 34 | 35 | 36 | def profile_from_dict(profile, profile_name, cli_vars="{}"): 37 | from dbt.config import Profile 38 | from dbt.config.renderer import ProfileRenderer 39 | from dbt.config.utils import parse_cli_vars 40 | 41 | if not isinstance(cli_vars, dict): 42 | cli_vars = parse_cli_vars(cli_vars) 43 | 44 | renderer = ProfileRenderer(cli_vars) 45 | return Profile.from_raw_profile_info( 46 | profile, 47 | profile_name, 48 | renderer, 49 | ) 50 | 51 | 52 | def project_from_dict(project, profile, packages=None, selectors=None, cli_vars="{}"): 53 | from dbt.config.renderer import DbtProjectYamlRenderer 54 | from dbt.config.utils import parse_cli_vars 55 | 56 | if not isinstance(cli_vars, dict): 57 | cli_vars = parse_cli_vars(cli_vars) 58 | 59 | renderer = DbtProjectYamlRenderer(profile, cli_vars) 60 | 61 | project_root = project.pop("project-root", os.getcwd()) 62 | 63 | partial = PartialProject.from_dicts( 64 | project_root=project_root, 65 | project_dict=project, 66 | packages_dict=packages, 67 | selectors_dict=selectors, 68 | ) 69 | return partial.render(renderer) 70 | 71 | 72 | def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars="{}"): 73 | from copy import deepcopy 74 | 75 | from dbt.config import Profile, Project, RuntimeConfig 76 | 77 | if isinstance(project, Project): 78 | profile_name = project.profile_name 79 | else: 80 | profile_name = project.get("profile") 81 | 82 | if not isinstance(profile, Profile): 83 | profile = profile_from_dict( 84 | deepcopy(profile), 85 | profile_name, 86 | cli_vars, 87 | ) 88 | 89 | if not isinstance(project, Project): 90 | project = project_from_dict( 91 | deepcopy(project), 92 | profile, 93 | packages, 94 | selectors, 95 | cli_vars, 96 | ) 97 | 98 | args = Obj() 99 | args.vars = cli_vars 100 | args.profile_dir = "/dev/null" 101 | return RuntimeConfig.from_parts(project=project, profile=profile, args=args) 102 | 103 | 104 | def inject_plugin(plugin): 105 | from dbt.adapters.factory import FACTORY 106 | 107 | key = plugin.adapter.type() 108 | FACTORY.plugins[key] = plugin 109 | 110 | 111 | def inject_adapter(value, plugin): 112 | """Inject the given adapter into the adapter factory, so your hand-crafted 113 | artisanal adapter will be available from get_adapter() as if dbt loaded it. 114 | """ 115 | inject_plugin(plugin) 116 | from dbt.adapters.factory import FACTORY 117 | 118 | key = value.type() 119 | FACTORY.adapters[key] = value 120 | 121 | 122 | def generate_name_macros(package): 123 | from dbt.contracts.graph.parsed import ParsedMacro 124 | from dbt.node_types import NodeType 125 | 126 | name_sql = {} 127 | for component in ("database", "schema", "alias"): 128 | if component == "alias": 129 | source = "node.name" 130 | else: 131 | source = f"target.{component}" 132 | name = f"generate_{component}_name" 133 | sql = f"{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}" 134 | name_sql[name] = sql 135 | 136 | all_sql = "\n".join(name_sql.values()) 137 | for name, sql in name_sql.items(): 138 | pm = ParsedMacro( 139 | name=name, 140 | resource_type=NodeType.Macro, 141 | unique_id=f"macro.{package}.{name}", 142 | package_name=package, 143 | original_file_path=normalize("macros/macro.sql"), 144 | root_path="./dbt_modules/root", 145 | path=normalize("macros/macro.sql"), 146 | raw_sql=all_sql, 147 | macro_sql=sql, 148 | ) 149 | yield pm 150 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/plugins/postgres.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from typing import Dict 3 | from typing import List 4 | from typing import Optional 5 | from typing import Tuple 6 | 7 | from duckdb import DuckDBPyConnection 8 | 9 | from . import BasePlugin 10 | from dbt.adapters.events.logging import AdapterLogger 11 | 12 | PG_EXT = "postgres" 13 | 14 | 15 | class Plugin(BasePlugin): 16 | logger = AdapterLogger("DuckDB_PostgresPlugin") 17 | 18 | def __init__(self, name: str, plugin_config: Dict[str, Any]): 19 | """ 20 | Initialize the Plugin with a name and configuration. 21 | """ 22 | super().__init__(name, plugin_config) 23 | self.logger.debug( 24 | "Plugin __init__ called with name: %s and config: %s", name, plugin_config 25 | ) 26 | self.initialize(plugin_config) 27 | 28 | def initialize(self, config: Dict[str, Any]): 29 | """ 30 | Initialize the plugin with the provided configuration. 31 | """ 32 | self.logger.debug("Initializing PostgreSQL plugin with config: %s", config) 33 | 34 | self._dsn: str = config["dsn"] 35 | if not self._dsn: 36 | self.logger.error( 37 | "Initialization failed: 'dsn' is a required argument for the postgres plugin!" 38 | ) 39 | raise ValueError("'dsn' is a required argument for the postgres plugin!") 40 | 41 | self._pg_schema: Optional[str] = config.get("pg_schema") # Can be None 42 | self._duckdb_alias: str = config.get("duckdb_alias", "postgres_db") 43 | self._read_only: bool = config.get("read_only", False) 44 | self._secret: Optional[str] = config.get("secret") 45 | self._attach_options: Dict[str, Any] = config.get( 46 | "attach_options", {} 47 | ) # Additional ATTACH options 48 | self._settings: Dict[str, Any] = config.get( 49 | "settings", {} 50 | ) # Extension settings via SET commands 51 | 52 | self.logger.info( 53 | "PostgreSQL plugin initialized with dsn='%s', pg_schema='%s', " 54 | "duckdb_alias='%s', read_only=%s, secret='%s'", 55 | self._dsn, 56 | self._pg_schema, 57 | self._duckdb_alias, 58 | self._read_only, 59 | self._secret, 60 | ) 61 | 62 | def configure_connection(self, conn: DuckDBPyConnection): 63 | """ 64 | Configure the DuckDB connection to attach the PostgreSQL database. 65 | """ 66 | self.logger.debug("Configuring DuckDB connection for PostgreSQL plugin.") 67 | 68 | conn.install_extension(PG_EXT) 69 | conn.load_extension(PG_EXT) 70 | self.logger.info("PostgreSQL extension installed and loaded.") 71 | 72 | # Set any extension settings provided 73 | self._set_extension_settings(conn) 74 | 75 | # Build and execute the ATTACH command 76 | attach_stmt = self._build_attach_statement() 77 | self.logger.debug("Executing ATTACH statement: %s", attach_stmt) 78 | try: 79 | conn.execute(attach_stmt) 80 | self.logger.info("Successfully attached PostgreSQL database with DSN: %s", self._dsn) 81 | except Exception as e: 82 | self.logger.error("Failed to attach PostgreSQL database: %s", e) 83 | raise 84 | 85 | def _set_extension_settings(self, conn: DuckDBPyConnection): 86 | """ 87 | Set extension settings via SET commands. 88 | """ 89 | for setting, value in self._settings.items(): 90 | # Quote string values 91 | if isinstance(value, str): 92 | value = f"'{value}'" 93 | elif isinstance(value, bool): 94 | value = "true" if value else "false" 95 | set_stmt = f"SET {setting} = {value};" 96 | self.logger.debug("Setting extension option: %s", set_stmt) 97 | try: 98 | conn.execute(set_stmt) 99 | except Exception as e: 100 | self.logger.error("Failed to set option %s: %s", setting, e) 101 | raise 102 | 103 | def _build_attach_statement(self) -> str: 104 | """ 105 | Build the ATTACH statement for connecting to the PostgreSQL database. 106 | """ 107 | attach_options: List[Tuple[str, Optional[str]]] = [("TYPE", "POSTGRES")] 108 | 109 | if self._pg_schema: 110 | attach_options.append(("SCHEMA", f"'{self._pg_schema}'")) 111 | 112 | if self._secret: 113 | attach_options.append(("SECRET", f"'{self._secret}'")) 114 | 115 | # Additional attach options 116 | for k, v in self._attach_options.items(): 117 | if isinstance(v, bool): 118 | v = "true" if v else "false" 119 | elif isinstance(v, str): 120 | v = f"'{v}'" 121 | attach_options.append((k.upper(), v)) 122 | 123 | if self._read_only: 124 | attach_options.append(("READ_ONLY", None)) # No value assigned 125 | 126 | # Convert options to string 127 | attach_options_str = ", ".join( 128 | f"{k} {v}" if v is not None else k for k, v in attach_options 129 | ) 130 | 131 | attach_stmt = f"ATTACH '{self._dsn}' AS {self._duckdb_alias} ({attach_options_str});" 132 | return attach_stmt 133 | -------------------------------------------------------------------------------- /dbt/adapters/duckdb/connections.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import threading 3 | from contextlib import contextmanager 4 | from multiprocessing.context import SpawnContext 5 | from typing import Optional 6 | from typing import Set 7 | from typing import Tuple 8 | from typing import TYPE_CHECKING 9 | 10 | import dbt.exceptions 11 | from . import environments 12 | from dbt.adapters.contracts.connection import AdapterRequiredConfig 13 | from dbt.adapters.contracts.connection import AdapterResponse 14 | from dbt.adapters.contracts.connection import Connection 15 | from dbt.adapters.contracts.connection import ConnectionState 16 | from dbt.adapters.events.logging import AdapterLogger 17 | from dbt.adapters.sql import SQLConnectionManager 18 | 19 | logger = AdapterLogger("DuckDB") 20 | 21 | if TYPE_CHECKING: 22 | import agate 23 | 24 | 25 | class DuckDBConnectionManager(SQLConnectionManager): 26 | TYPE = "duckdb" 27 | _LOCK = threading.RLock() 28 | _ENV = None 29 | _LOGGED_MESSAGES: Set[str] = set() 30 | 31 | def __init__(self, config: AdapterRequiredConfig, mp_context: SpawnContext) -> None: 32 | super().__init__(config, mp_context) 33 | self.disable_transactions = config.credentials.disable_transactions # type: ignore 34 | 35 | @classmethod 36 | def env(cls) -> environments.Environment: 37 | with cls._LOCK: 38 | if not cls._ENV: 39 | raise Exception("DuckDBConnectionManager environment requested before creation!") 40 | return cls._ENV 41 | 42 | @classmethod 43 | def open(cls, connection: Connection) -> Connection: 44 | if connection.state == ConnectionState.OPEN: 45 | logger.debug("Connection is already open, skipping open.") 46 | return connection 47 | 48 | credentials = cls.get_credentials(connection.credentials) 49 | with cls._LOCK: 50 | try: 51 | if not cls._ENV or cls._ENV.creds != credentials: 52 | cls._ENV = environments.create(credentials) 53 | connection.handle = cls._ENV.handle() 54 | connection.state = ConnectionState.OPEN 55 | 56 | except RuntimeError as e: 57 | logger.debug("Got an error when attempting to connect to DuckDB: '{}'".format(e)) 58 | connection.handle = None 59 | connection.state = ConnectionState.FAIL 60 | raise dbt.adapters.exceptions.FailedToConnectError(str(e)) 61 | 62 | return connection 63 | 64 | @classmethod 65 | def close(cls, connection: Connection) -> Connection: 66 | # if the connection is in closed or init, there's nothing to do 67 | if connection.state in {ConnectionState.CLOSED, ConnectionState.INIT}: 68 | return connection 69 | 70 | connection = super(SQLConnectionManager, cls).close(connection) 71 | return connection 72 | 73 | @classmethod 74 | def warn_once(cls, msg: str): 75 | """Post a warning message once per dbt execution.""" 76 | with cls._LOCK: 77 | if msg in cls._LOGGED_MESSAGES: 78 | return 79 | cls._LOGGED_MESSAGES.add(msg) 80 | logger.warning(msg) 81 | 82 | def cancel(self, connection: Connection): 83 | if self._ENV is not None: 84 | logger.debug( 85 | "cancelling query on connection {}. Details: {}".format( 86 | connection.name, connection 87 | ) 88 | ) 89 | self._ENV.cancel(connection) 90 | logger.debug("query cancelled on connection {}".format(connection.name)) 91 | 92 | @contextmanager 93 | def exception_handler(self, sql: str, connection_name="master"): 94 | try: 95 | yield 96 | except dbt.exceptions.DbtRuntimeError: 97 | raise 98 | except RuntimeError as e: 99 | logger.debug("duckdb error: {}".format(str(e))) 100 | logger.debug("Error running SQL: {}".format(sql)) 101 | # Preserve original RuntimeError with full context instead of swallowing 102 | raise dbt.exceptions.DbtRuntimeError(str(e)) from e 103 | except Exception as exc: 104 | logger.debug("duckdb error: {}".format(str(exc))) 105 | logger.debug("Error running SQL: {}".format(sql)) 106 | logger.debug("Rolling back transaction.") 107 | raise dbt.exceptions.DbtRuntimeError(str(exc)) from exc 108 | 109 | @classmethod 110 | def get_credentials(cls, credentials): 111 | return credentials 112 | 113 | @classmethod 114 | def get_response(cls, cursor) -> AdapterResponse: 115 | # https://github.com/dbt-labs/dbt-spark/issues/142 116 | message = "OK" 117 | return AdapterResponse(_message=message) 118 | 119 | @classmethod 120 | def close_all_connections(cls): 121 | with cls._LOCK: 122 | if cls._ENV is not None: 123 | cls._ENV = None 124 | 125 | def execute( 126 | self, 127 | sql: str, 128 | auto_begin: bool = False, 129 | fetch: bool = False, 130 | limit: Optional[int] = None, 131 | ) -> Tuple[AdapterResponse, "agate.Table"]: 132 | if self.disable_transactions: 133 | auto_begin = False 134 | return super().execute(sql, auto_begin, fetch, limit) 135 | 136 | 137 | atexit.register(DuckDBConnectionManager.close_all_connections) 138 | -------------------------------------------------------------------------------- /tests/functional/plugins/motherduck/test_motherduck_attach.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import ( 3 | run_dbt, 4 | ) 5 | 6 | random_logs_sql = """ 7 | {{ config(materialized='table', meta=dict(temp_schema_name='dbt_temp_test')) }} 8 | 9 | select 10 | uuid()::varchar as log_id, 11 | '2023-10-01'::timestamp + interval 1 minute * (random() * 20000)::int as dt , 12 | (random() * 4)::int64 as user_id 13 | from generate_series(1, 10000) g(x) 14 | """ 15 | 16 | summary_of_logs_sql = """ 17 | {{ 18 | config( 19 | materialized='incremental', 20 | meta=dict(temp_schema_name='dbt_temp_test'), 21 | ) 22 | }} 23 | 24 | select dt::date as dt, user_id, count(1) as c 25 | from {{ ref('random_logs_test') }} 26 | 27 | 28 | {% if is_incremental() %} 29 | 30 | -- this filter will only be applied on an incremental run 31 | -- (uses > to include records whose timestamp occurred since the last run of this model) 32 | where dt > '2023-10-08'::timestamp 33 | 34 | {% endif %} 35 | group by all 36 | """ 37 | 38 | python_pyarrow_table_model = """ 39 | import pyarrow as pa 40 | 41 | def model(dbt, con): 42 | return pa.Table.from_pydict({"a": [1,2,3]}) 43 | """ 44 | 45 | @pytest.mark.skip_profile("buenavista", "file", "memory") 46 | class TestMDPluginAttach: 47 | @pytest.fixture(scope="class") 48 | def profiles_config_update(self, dbt_profile_target, test_database_name): 49 | md_config = {"token": dbt_profile_target.get("token")} 50 | plugins = [{"module": "motherduck", "config": md_config}] 51 | return { 52 | "test": { 53 | "outputs": { 54 | "dev": { 55 | "type": "duckdb", 56 | "path": ":memory:", 57 | "plugins": plugins, 58 | "attach": [ 59 | { 60 | "path": f"md:{test_database_name}", 61 | "type": "motherduck" 62 | } 63 | ] 64 | } 65 | }, 66 | "target": "dev", 67 | } 68 | } 69 | 70 | @pytest.fixture(scope="class") 71 | def models(self, md_sql): 72 | return { 73 | "md_table.sql": md_sql, 74 | "random_logs_test.sql": random_logs_sql, 75 | "summary_of_logs_test.sql": summary_of_logs_sql, 76 | "python_pyarrow_table_model.py": python_pyarrow_table_model, 77 | } 78 | 79 | @pytest.fixture(scope="class") 80 | def md_sql(self, test_database_name): 81 | # Reads from a MD database in my test account in the cloud 82 | return f""" 83 | select * FROM {test_database_name}.main.plugin_table 84 | """ 85 | 86 | @pytest.fixture(autouse=True) 87 | def run_dbt_scope(self, project, test_database_name): 88 | project.run_sql(f"CREATE OR REPLACE TABLE {test_database_name}.plugin_table (i integer, j string)") 89 | project.run_sql(f"INSERT INTO {test_database_name}.plugin_table (i, j) VALUES (1, 'foo')") 90 | yield 91 | project.run_sql("DROP VIEW IF EXISTS md_table") 92 | project.run_sql("DROP TABLE IF EXISTS random_logs_test") 93 | project.run_sql("DROP TABLE IF EXISTS summary_of_logs_test") 94 | project.run_sql(f"DROP TABLE IF EXISTS {test_database_name}.plugin_table") 95 | project.run_sql("DROP TABLE IF EXISTS python_pyarrow_table_model") 96 | 97 | def test_motherduck(self, project): 98 | run_dbt(expect_pass=True) 99 | 100 | 101 | @pytest.mark.skip_profile("buenavista", "file", "memory") 102 | class TestMDPluginAttachWithSettings(TestMDPluginAttach): 103 | @pytest.fixture(scope="class") 104 | def profiles_config_update(self, dbt_profile_target, test_database_name): 105 | md_setting = {"motherduck_token": dbt_profile_target.get("token")} 106 | return { 107 | "test": { 108 | "outputs": { 109 | "dev": { 110 | "type": "duckdb", 111 | "path": ":memory:", 112 | "attach": [ 113 | { 114 | "path": f"md:{test_database_name}", 115 | "type": "motherduck" 116 | } 117 | ], 118 | "settings": md_setting 119 | } 120 | }, 121 | "target": "dev", 122 | } 123 | } 124 | 125 | 126 | @pytest.mark.skip_profile("buenavista", "file", "memory") 127 | class TestMDPluginAttachWithTokenInPath(TestMDPluginAttach): 128 | @pytest.fixture(scope="class") 129 | def profiles_config_update(self, dbt_profile_target, test_database_name): 130 | token = dbt_profile_target.get("token") 131 | return { 132 | "test": { 133 | "outputs": { 134 | "dev": { 135 | "type": "duckdb", 136 | "path": ":memory:", 137 | "attach": [ 138 | { 139 | "path": f"md:{test_database_name}?motherduck_token={token}&user=1", 140 | "type": "motherduck" 141 | } 142 | ] 143 | } 144 | }, 145 | "target": "dev", 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_table_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test that table functions work as intended. 3 | See README for reasons to use this materialization approach! 4 | """ 5 | import pytest 6 | 7 | from dbt.tests.util import ( 8 | run_dbt, 9 | check_result_nodes_by_name, 10 | relation_from_name 11 | ) 12 | 13 | seeds__example_seed_csv = """a,b,c 14 | 1,2,3 15 | 4,5,6 16 | 7,8,9 17 | """ 18 | 19 | models__example_table = """ 20 | {{ config(materialized='table') }} 21 | select * from {{ ref("seed") }} 22 | """ 23 | 24 | models__my_table_function = """ 25 | {{ config(materialized='table_function') }} 26 | select * from {{ ref("example_table") }} 27 | """ 28 | 29 | models__use_table_function = """ 30 | {{ config(materialized='table') }} 31 | select * from {{ ref("my_table_function") }}() 32 | """ 33 | 34 | models__my_table_function_1_param = """ 35 | {{ config(materialized='table_function', parameters='where_a') }} 36 | select * from {{ ref("example_table") }} 37 | where a = where_a 38 | """ 39 | 40 | models__use_table_function_1_param = """ 41 | {{ config(materialized='table') }} 42 | select * from {{ ref("my_table_function_1_param") }}(4) 43 | """ 44 | 45 | models__my_table_function_1_param_with_comma = """ 46 | {{ config(materialized='table_function', parameters='where_a, where_b') }} 47 | select * from {{ ref("example_table") }} 48 | where 1=1 49 | and a = where_a 50 | and b = where_b 51 | """ 52 | 53 | models__use_table_function_1_param_with_comma = """ 54 | {{ config(materialized='table') }} 55 | select * from {{ ref("my_table_function_1_param_with_comma") }}(4, 5) 56 | """ 57 | 58 | models__my_table_function_2_params = """ 59 | {{ config(materialized='table_function', parameters=['where_a', 'where_b']) }} 60 | select * from {{ ref("example_table") }} 61 | where 1=1 62 | and a = where_a 63 | and b = where_b 64 | """ 65 | 66 | models__use_table_function_2_params = """ 67 | {{ config(materialized='table') }} 68 | select * from {{ ref("my_table_function_2_params") }}(4, 5) 69 | """ 70 | 71 | # To test that the table function will work smoothly even if a column is added: 72 | # Create an example_table 73 | # create a table_function that is select * from example_table 74 | # Persist the output of that table function to a table 75 | # Alter the table to add a column 76 | # Persist the output of that table function to a new table (should include the new column) 77 | # Note this will not recreate the table_function (which would have been needed with a view) 78 | models__use_table_function_after_adding_column = """ 79 | -- depends_on: {{ ref('use_table_function') }} 80 | {{ config(materialized='table') }} 81 | {% set alter_table_query %} 82 | alter table {{ ref("example_table") }} add column d integer default 42 83 | {% endset %} 84 | 85 | {% set results = run_query(alter_table_query) %} 86 | select * from {{ ref("my_table_function") }}() 87 | """ 88 | 89 | 90 | 91 | @pytest.mark.skip_profile("buenavista") 92 | class TestTableFunction: 93 | 94 | @pytest.fixture(scope="class") 95 | def seeds(self): 96 | return { 97 | "seed.csv": seeds__example_seed_csv, 98 | } 99 | 100 | @pytest.fixture(scope="class") 101 | def models(self): 102 | return { 103 | "example_table.sql": models__example_table, 104 | "my_table_function.sql": models__my_table_function, 105 | "use_table_function.sql": models__use_table_function, 106 | "my_table_function_1_param.sql": models__my_table_function_1_param, 107 | "use_table_function_1_param.sql": models__use_table_function_1_param, 108 | "my_table_function_1_param_with_comma.sql": models__my_table_function_1_param_with_comma, 109 | "use_table_function_1_param_with_comma.sql": models__use_table_function_1_param_with_comma, 110 | "my_table_function_2_params.sql": models__my_table_function_2_params, 111 | "use_table_function_2_params.sql": models__use_table_function_2_params, 112 | "use_table_function_after_adding_column.sql": models__use_table_function_after_adding_column, 113 | } 114 | 115 | def test_base(self, project): 116 | # seed command 117 | results = run_dbt(["seed"]) 118 | assert len(results) == 1 119 | check_result_nodes_by_name(results, ["seed"]) 120 | 121 | results = run_dbt(["run"]) 122 | assert len(results) == 10 123 | check_result_nodes_by_name(results, [ 124 | "example_table", 125 | "my_table_function", 126 | "use_table_function", 127 | "my_table_function_1_param", 128 | "use_table_function_1_param", 129 | "my_table_function_1_param_with_comma", 130 | "use_table_function_1_param_with_comma", 131 | "my_table_function_2_params", 132 | "use_table_function_2_params", 133 | "use_table_function_after_adding_column" 134 | ]) 135 | 136 | relation_pre_alter = relation_from_name(project.adapter, "use_table_function") 137 | result_pre_alter = project.run_sql(f"describe {relation_pre_alter}", fetch="all") 138 | column_names_pre_alter = [row[0] for row in result_pre_alter] 139 | assert column_names_pre_alter == ['a', 'b', 'c'] 140 | 141 | relation_post_alter = relation_from_name(project.adapter, "use_table_function_after_adding_column") 142 | result_post_alter = project.run_sql(f"describe {relation_post_alter}", fetch="all") 143 | column_names_post_alter = [row[0] for row in result_post_alter] 144 | assert column_names_post_alter == ['a', 'b', 'c', 'd'] 145 | -------------------------------------------------------------------------------- /tests/unit/test_data_path_quoting.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.adapters.duckdb.credentials import Attachment 3 | 4 | 5 | class TestDataPathQuoting: 6 | """Test that data_path options are properly quoted in SQL generation.""" 7 | 8 | def test_data_path_s3_url_should_be_quoted(self): 9 | """Test that S3 URLs in data_path are properly quoted.""" 10 | attachment = Attachment( 11 | path="/tmp/test.db", 12 | options={"data_path": "s3://my-bucket/path"} 13 | ) 14 | sql = attachment.to_sql() 15 | # Should generate: ATTACH '/tmp/test.db' (DATA_PATH 's3://my-bucket/path') 16 | assert "DATA_PATH 's3://my-bucket/path'" in sql 17 | 18 | def test_data_path_windows_path_should_be_quoted(self): 19 | """Test that Windows paths in data_path are properly quoted.""" 20 | attachment = Attachment( 21 | path="/tmp/test.db", 22 | options={"data_path": "C:\\Users\\test\\data"} 23 | ) 24 | sql = attachment.to_sql() 25 | # Should generate: ATTACH '/tmp/test.db' (DATA_PATH 'C:\Users\test\data') 26 | assert "DATA_PATH 'C:\\Users\\test\\data'" in sql 27 | 28 | def test_data_path_unix_path_should_be_quoted(self): 29 | """Test that Unix paths in data_path are properly quoted.""" 30 | attachment = Attachment( 31 | path="/tmp/test.db", 32 | options={"data_path": "/home/user/data"} 33 | ) 34 | sql = attachment.to_sql() 35 | # Should generate: ATTACH '/tmp/test.db' (DATA_PATH '/home/user/data') 36 | assert "DATA_PATH '/home/user/data'" in sql 37 | 38 | def test_data_path_url_with_spaces_should_be_quoted(self): 39 | """Test that paths with spaces are properly quoted.""" 40 | attachment = Attachment( 41 | path="/tmp/test.db", 42 | options={"data_path": "/path/with spaces/data"} 43 | ) 44 | sql = attachment.to_sql() 45 | # Should generate: ATTACH '/tmp/test.db' (DATA_PATH '/path/with spaces/data') 46 | assert "DATA_PATH '/path/with spaces/data'" in sql 47 | 48 | def test_numeric_options_should_not_be_quoted(self): 49 | """Test that numeric options are not quoted.""" 50 | attachment = Attachment( 51 | path="/tmp/test.db", 52 | options={"timeout": 30000} 53 | ) 54 | sql = attachment.to_sql() 55 | # Should generate: ATTACH '/tmp/test.db' (TIMEOUT 30000) 56 | assert "TIMEOUT 30000" in sql 57 | assert "TIMEOUT '30000'" not in sql 58 | 59 | def test_boolean_options_work_correctly(self): 60 | """Test that boolean options work as expected.""" 61 | attachment = Attachment( 62 | path="/tmp/test.db", 63 | options={"use_cache": True, "skip_validation": False} 64 | ) 65 | sql = attachment.to_sql() 66 | # True booleans should appear as flag, False booleans should be omitted 67 | assert "USE_CACHE" in sql 68 | assert "SKIP_VALIDATION" not in sql 69 | 70 | def test_multiple_options_with_data_path(self): 71 | """Test multiple options including data_path.""" 72 | attachment = Attachment( 73 | path="/tmp/test.db", 74 | options={ 75 | "data_path": "s3://bucket/path", 76 | "timeout": 5000, 77 | "use_cache": True 78 | } 79 | ) 80 | sql = attachment.to_sql() 81 | assert "DATA_PATH 's3://bucket/path'" in sql 82 | assert "TIMEOUT 5000" in sql 83 | assert "USE_CACHE" in sql 84 | 85 | def test_already_single_quoted_strings_not_double_quoted(self): 86 | """Test that already single-quoted strings are not double-quoted.""" 87 | attachment = Attachment( 88 | path="/tmp/test.db", 89 | options={"data_path": "'s3://my-bucket/path'"} 90 | ) 91 | sql = attachment.to_sql() 92 | # Should keep existing single quotes, not add more 93 | assert "DATA_PATH 's3://my-bucket/path'" in sql 94 | assert "DATA_PATH ''s3://my-bucket/path''" not in sql 95 | 96 | def test_already_double_quoted_strings_preserved(self): 97 | """Test that already double-quoted strings are preserved.""" 98 | attachment = Attachment( 99 | path="/tmp/test.db", 100 | options={"data_path": '"s3://my-bucket/path"'} 101 | ) 102 | sql = attachment.to_sql() 103 | # Should keep existing double quotes 104 | assert 'DATA_PATH "s3://my-bucket/path"' in sql 105 | assert 'DATA_PATH \'"s3://my-bucket/path"\'' not in sql 106 | 107 | def test_quoted_strings_with_whitespace_preserved(self): 108 | """Test that quoted strings with surrounding whitespace are preserved.""" 109 | attachment = Attachment( 110 | path="/tmp/test.db", 111 | options={"data_path": " 's3://my-bucket/path' "} 112 | ) 113 | sql = attachment.to_sql() 114 | # Should detect quotes despite whitespace and preserve original value 115 | assert "DATA_PATH 's3://my-bucket/path' " in sql 116 | assert "DATA_PATH ' 's3://my-bucket/path' '" not in sql 117 | 118 | def test_quoted_strings_with_whitespace_double_quotes(self): 119 | """Test that double quoted strings with surrounding whitespace are preserved.""" 120 | attachment = Attachment( 121 | path="/tmp/test.db", 122 | options={"data_path": ' "s3://my-bucket/path" '} 123 | ) 124 | sql = attachment.to_sql() 125 | # Should detect quotes despite whitespace and preserve original value 126 | assert 'DATA_PATH "s3://my-bucket/path" ' in sql 127 | assert 'DATA_PATH \' "s3://my-bucket/path" \'' not in sql -------------------------------------------------------------------------------- /dbt/include/duckdb/macros/materializations/incremental_strategy/merge_config_validation.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_merge_config(config, target_relation=none) %} 2 | {%- set errors = [] -%} 3 | 4 | {%- set base_configuration_fields = { 5 | 'merge_update_condition': 'string', 6 | 'merge_insert_condition': 'string', 7 | 'merge_on_using_columns': 'sequence', 8 | 'merge_update_columns': 'sequence', 9 | 'merge_update_set_expressions': 'mapping', 10 | 'merge_exclude_columns': 'sequence', 11 | 'merge_returning_columns': 'sequence' 12 | } -%} 13 | 14 | {%- for field_name, field_type in base_configuration_fields.items() -%} 15 | {%- set field_value = config.get(field_name) -%} 16 | {%- if field_type == 'string' -%} 17 | {%- do validate_string_field(field_value, field_name, errors) -%} 18 | {%- elif field_type == 'sequence' -%} 19 | {%- do validate_string_list_field(field_value, field_name, errors) -%} 20 | {%- elif field_type == 'mapping' -%} 21 | {%- do validate_dict_field(field_value, field_name, errors) -%} 22 | {%- endif -%} 23 | {%- endfor -%} 24 | 25 | {%- do validate_ducklake_restrictions(config, target_relation, errors) -%} 26 | 27 | {%- do validate_merge_clauses(config, base_configuration_fields, errors) -%} 28 | 29 | {%- if errors -%} 30 | {{ exceptions.raise_compiler_error("MERGE configuration errors:\n" ~ errors|join('\n')) }} 31 | {%- endif -%} 32 | {% endmacro %} 33 | 34 | 35 | {%- macro validate_merge_clauses(config, base_configuration_fields, errors) -%} 36 | {%- if config.get('merge_clauses') is not none -%} 37 | {%- if config.get('merge_clauses') is not mapping -%} 38 | {%- do errors.append("merge_clauses must be a dictionary, found: " ~ config.get('merge_clauses')) -%} 39 | {%- else -%} 40 | {%- set merge_clauses = config.get('merge_clauses') -%} 41 | {%- set clause_types = ['when_matched', 'when_not_matched'] -%} 42 | 43 | {%- set has_when_matched = 'when_matched' in merge_clauses -%} 44 | {%- set has_when_not_matched = 'when_not_matched' in merge_clauses -%} 45 | 46 | {%- if not has_when_matched and not has_when_not_matched -%} 47 | {%- do errors.append("merge_clauses must contain at least one of 'when_matched' or 'when_not_matched' keys") -%} 48 | {%- endif -%} 49 | 50 | {%- for clause_type in clause_types -%} 51 | {%- if clause_type in merge_clauses -%} 52 | {%- do validate_merge_clause_list(merge_clauses, clause_type, errors) -%} 53 | {%- endif -%} 54 | {%- endfor -%} 55 | 56 | {%- set conflicting_configs = [] -%} 57 | {%- for config_name, config_type in base_configuration_fields.items() -%} 58 | {%- if config_name not in ['merge_on_using_columns', 'merge_returning_columns'] -%} 59 | {%- set config_value = config.get(config_name) -%} 60 | {%- if config_value is not none -%} 61 | {%- if config_type == 'sequence' -%} 62 | {%- if config_value|length > 0 -%} 63 | {%- do conflicting_configs.append(config_name) -%} 64 | {%- endif -%} 65 | {%- elif config_type == 'mapping' -%} 66 | {%- if config_value.keys()|length > 0 -%} 67 | {%- do conflicting_configs.append(config_name) -%} 68 | {%- endif -%} 69 | {%- else -%} 70 | {%- do conflicting_configs.append(config_name) -%} 71 | {%- endif -%} 72 | {%- endif -%} 73 | {%- endif -%} 74 | {%- endfor -%} 75 | 76 | {%- if conflicting_configs|length > 0 -%} 77 | {%- do errors.append("When merge_clauses is specified, the following basic merge configurations will be ignored and should be removed: " ~ conflicting_configs|join(', ') ~ ". Define your merge behavior within merge_clauses instead.") -%} 78 | {%- endif -%} 79 | {%- endif -%} 80 | {%- endif -%} 81 | {%- endmacro -%} 82 | 83 | {%- macro validate_merge_clause_list(merge_clauses, clause_type, errors) -%} 84 | {%- if merge_clauses.get(clause_type) is not sequence or merge_clauses.get(clause_type) is mapping or merge_clauses.get(clause_type) is string -%} 85 | {%- do errors.append("merge_clauses." ~ clause_type ~ " must be a list") -%} 86 | {%- elif merge_clauses.get(clause_type)|length == 0 -%} 87 | {%- do errors.append("merge_clauses." ~ clause_type ~ " must contain at least one element") -%} 88 | {%- else -%} 89 | {%- for clause in merge_clauses.get(clause_type) -%} 90 | {%- if clause is not mapping -%} 91 | {%- do errors.append("merge_clauses." ~ clause_type ~ " elements must be dictionaries, found: " ~ clause) -%} 92 | {%- endif -%} 93 | {%- endfor -%} 94 | {%- endif -%} 95 | {%- endmacro -%} 96 | 97 | {%- macro validate_ducklake_restrictions(config, target_relation, errors) -%} 98 | {%- if target_relation and adapter.is_ducklake(target_relation) -%} 99 | {%- set merge_clauses = config.get('merge_clauses', {}) -%} 100 | {%- if merge_clauses and 'when_matched' in merge_clauses -%} 101 | {%- set when_matched_clauses = merge_clauses.get('when_matched', []) -%} 102 | {%- set update_delete_count = 0 -%} 103 | 104 | {%- for clause in when_matched_clauses -%} 105 | {%- if clause is mapping and clause.get('action') in ['update', 'delete'] -%} 106 | {%- set update_delete_count = update_delete_count + 1 -%} 107 | {%- endif -%} 108 | {%- endfor -%} 109 | 110 | {%- if update_delete_count > 1 -%} 111 | {%- do errors.append("DuckLake MERGE restrictions: when_matched clauses can contain only a single UPDATE or DELETE action. Found " ~ update_delete_count ~ " UPDATE/DELETE actions. DuckLake currently supports only one UPDATE or DELETE operation per MERGE statement.") -%} 112 | {%- endif -%} 113 | {%- endif -%} 114 | {%- endif -%} 115 | {%- endmacro -%} 116 | --------------------------------------------------------------------------------