├── tests
    ├── __init__.py
    ├── unit
    │   ├── __init__.py
    │   ├── test_retries_connect.py
    │   ├── test_retries_query.py
    │   ├── test_get_column_schema.py
    │   ├── test_external_utils.py
    │   ├── test_column.py
    │   ├── utils.py
    │   └── test_data_path_quoting.py
    ├── functional
    │   ├── adapter
    │   │   ├── indexes
    │   │   │   ├── __init__.py
    │   │   │   ├── fixtures.py
    │   │   │   └── test_indexes.py
    │   │   ├── test_concurrency.py
    │   │   ├── test_changing_relation_type.py
    │   │   ├── store_test_failures_tests
    │   │   │   └── test_store_test_failures.py
    │   │   ├── test_empty.py
    │   │   ├── test_simple_snapshot.py
    │   │   ├── test_caching.py
    │   │   ├── aliases
    │   │   │   └── test_aliases.py
    │   │   ├── test_persist_docs.py
    │   │   ├── utils
    │   │   │   ├── test_date_spine.py
    │   │   │   └── test_utils.py
    │   │   ├── test_unit_testing.py
    │   │   ├── test_community_extensions.py
    │   │   ├── test_hooks.py
    │   │   ├── simple_seed
    │   │   │   └── test_fast_seed.py
    │   │   ├── test_sources.py
    │   │   ├── test_rematerialize.py
    │   │   ├── test_attach.py
    │   │   ├── test_basic.py
    │   │   ├── test_constraints.py
    │   │   ├── test_ephemeral.py
    │   │   ├── test_write_options.py
    │   │   └── test_table_function.py
    │   ├── plugins
    │   │   ├── motherduck
    │   │   │   ├── fixtures.py
    │   │   │   ├── conftest.py
    │   │   │   ├── test_motherduck_ducklake.py
    │   │   │   ├── test_motherduck_write_conflict.py
    │   │   │   ├── test_macros.py
    │   │   │   └── test_motherduck_attach.py
    │   │   ├── test_glue.py
    │   │   ├── test_sqlite.py
    │   │   ├── test_iceberg.py
    │   │   ├── test_excel.py
    │   │   ├── test_gsheet.py
    │   │   ├── test_delta.py
    │   │   └── test_plugins.py
    │   └── fsspec
    │   │   └── test_filesystems.py
    ├── data
    │   └── excel_file.xlsx
    ├── create_function_plugin.py
    ├── bv_test_server.py
    └── conftest.py
├── MANIFEST.in
├── mypy.ini
├── dbt
    ├── include
    │   ├── duckdb
    │   │   ├── __init__.py
    │   │   ├── dbt_project.yml
    │   │   ├── macros
    │   │   │   ├── utils
    │   │   │   │   ├── any_value.sql
    │   │   │   │   ├── splitpart.sql
    │   │   │   │   ├── generate_series.sql
    │   │   │   │   ├── external_location.sql
    │   │   │   │   ├── lastday.sql
    │   │   │   │   ├── datediff.sql
    │   │   │   │   ├── listagg.sql
    │   │   │   │   ├── dateadd.sql
    │   │   │   │   └── upstream.sql
    │   │   │   ├── materializations
    │   │   │   │   ├── hooks.sql
    │   │   │   │   ├── incremental_strategy
    │   │   │   │   │   ├── merge_defaults.sql
    │   │   │   │   │   ├── validation_helper.sql
    │   │   │   │   │   ├── delete_insert.sql
    │   │   │   │   │   └── merge_config_validation.sql
    │   │   │   │   ├── table_function.sql
    │   │   │   │   └── table.sql
    │   │   │   ├── columns.sql
    │   │   │   ├── catalog.sql
    │   │   │   ├── persist_docs.sql
    │   │   │   ├── snapshot_helper.sql
    │   │   │   └── seed.sql
    │   │   └── sample_profiles.yml
    │   └── __init__.py
    ├── __init__.py
    └── adapters
    │   ├── __init__.py
    │   └── duckdb
    │       ├── constants.py
    │       ├── __version__.py
    │       ├── __init__.py
    │       ├── plugins
    │           ├── pd_utils.py
    │           ├── iceberg.py
    │           ├── delta.py
    │           ├── sqlalchemy.py
    │           ├── gsheet.py
    │           ├── motherduck.py
    │           └── postgres.py
    │       ├── environments
    │           ├── motherduck.py
    │           └── buenavista.py
    │       ├── utils.py
    │       ├── secrets.py
    │       ├── column.py
    │       ├── relation.py
    │       └── connections.py
├── setup.py
├── .flake8
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── nightly.yml
    │   └── release.yml
├── pytest.ini
├── scripts
    └── build-dist.sh
├── dev-requirements.txt
├── .gitignore
├── .pre-commit-config.yaml
├── .devcontainer
    ├── Dockerfile
    └── devcontainer.json
├── setup.cfg
├── CHANGELOG.md
└── tox.ini


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/indexes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include dbt/include *.sql *.yml *.md
2 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | mypy_path = ./third-party-stubs
3 | namespace_packages = True
4 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | PACKAGE_PATH = os.path.dirname(__file__)
4 | 


--------------------------------------------------------------------------------
/tests/data/excel_file.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/dbt-duckdb/HEAD/tests/data/excel_file.xlsx


--------------------------------------------------------------------------------
/dbt/__init__.py:
--------------------------------------------------------------------------------
1 | from pkgutil import extend_path
2 | 
3 | __path__ = extend_path(__path__, __name__)  # type: ignore
4 | 


--------------------------------------------------------------------------------
/dbt/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | from pkgutil import extend_path
2 | 
3 | __path__ = extend_path(__path__, __name__)  # type: ignore
4 | 


--------------------------------------------------------------------------------
/dbt/include/__init__.py:
--------------------------------------------------------------------------------
1 | from pkgutil import extend_path
2 | 
3 | __path__ = extend_path(__path__, __name__)  # type: ignore
4 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/dbt_project.yml:
--------------------------------------------------------------------------------
1 | 
2 | name: dbt_duckdb
3 | version: 1.0
4 | config-version: 2
5 | 
6 | macro-paths: ["macros"]
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from setuptools import setup
3 | 
4 | setup(
5 |     setup_requires=["pbr"],
6 |     pbr=True,
7 | )
8 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/any_value.sql:
--------------------------------------------------------------------------------
1 | {% macro duckdb__any_value(expression) -%}
2 | 
3 |     arbitrary({{ expression }})
4 | 
5 | {%- endmacro %}
6 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_concurrency.py:
--------------------------------------------------------------------------------
1 | from dbt.tests.adapter.concurrency.test_concurrency import TestConcurenncy
2 | 
3 | 
4 | class TestConcurrencyDuckDB(TestConcurenncy):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/splitpart.sql:
--------------------------------------------------------------------------------
1 | {% macro duckdb__split_part(string_text, delimiter_text, part_number) %}
2 |     string_split({{ string_text }}, {{ delimiter_text }})[ {{ part_number }} ]
3 | {% endmacro %}
4 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | select =
 3 |     E
 4 |     W
 5 |     F
 6 | ignore =
 7 |     W503 # makes Flake8 work like black
 8 |     W504
 9 |     E203 # makes Flake8 work like black
10 |     E741
11 |     E501
12 | exclude = tests
13 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/generate_series.sql:
--------------------------------------------------------------------------------
1 | {% macro duckdb__generate_series(upper_bound) %}
2 |     select
3 |         generate_series as generated_number
4 |     from generate_series(1, {{ upper_bound }})
5 | {% endmacro %}
6 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/constants.py:
--------------------------------------------------------------------------------
1 | TEMP_SCHEMA_NAME = "temp_schema_name"
2 | DEFAULT_TEMP_SCHEMA_NAME = "dbt_temp"
3 | DUCKDB_MERGE_LOWEST_VERSION_POSSIBLE = "1.4.0-dev0"
4 | DUCKDB_BASE_INCREMENTAL_STRATEGIES = ["append", "delete+insert"]
5 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_changing_relation_type.py:
--------------------------------------------------------------------------------
1 | from dbt.tests.adapter.relations.test_changing_relation_type import BaseChangeRelationTypeValidator
2 | 
3 | 
4 | class TestChangeRelationTypesDuckDB(BaseChangeRelationTypeValidator):
5 |     pass


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "weekly"
 7 |   - package-ecosystem: "pip"
 8 |     directory: "/"
 9 |     schedule:
10 |       interval: "weekly"
11 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/sample_profiles.yml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   outputs:
 3 |     dev:
 4 |       type: duckdb
 5 |       path: dev.duckdb
 6 |       threads: 1
 7 | 
 8 |     prod:
 9 |       type: duckdb
10 |       path: prod.duckdb
11 |       threads: 4
12 | 
13 |   target: dev
14 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py:
--------------------------------------------------------------------------------
1 | from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import (
2 |     TestStoreTestFailures,
3 | )
4 | 
5 | 
6 | class DuckDBTestStoreTestFailures(TestStoreTestFailures):
7 |     pass
8 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | filterwarnings =
 3 |     ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning
 4 |     ignore:unclosed file .*:ResourceWarning
 5 | testpaths =
 6 |     tests/functional
 7 |     tests/unit
 8 | markers =
 9 |     skip_profile(profile)
10 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_empty.py:
--------------------------------------------------------------------------------
 1 | from dbt.tests.adapter.empty.test_empty import BaseTestEmpty, BaseTestEmptyInlineSourceRef
 2 | 
 3 | 
 4 | class TestDuckDBEmpty(BaseTestEmpty):
 5 |     pass
 6 | 
 7 | 
 8 | class TestDuckDBEmptyInlineSourceRef(BaseTestEmptyInlineSourceRef):
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_simple_snapshot.py:
--------------------------------------------------------------------------------
 1 | from dbt.tests.adapter.simple_snapshot.test_snapshot import (
 2 |     BaseSnapshotCheck,
 3 |     BaseSimpleSnapshot,
 4 | )
 5 | 
 6 | 
 7 | class TestSimpleSnapshotDuckDB(BaseSimpleSnapshot):
 8 |     pass
 9 | 
10 | 
11 | class TestSnapshotCheckDuckDB(BaseSnapshotCheck):
12 |     pass
13 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_caching.py:
--------------------------------------------------------------------------------
 1 | from dbt.tests.adapter.caching.test_caching import (
 2 |     BaseCachingLowercaseModel,
 3 |     BaseCachingSelectedSchemaOnly,
 4 | )
 5 | 
 6 | 
 7 | class TestCachingLowerCaseModelDuckDB(BaseCachingLowercaseModel):
 8 |     pass
 9 | 
10 | 
11 | class TestCachingSelectedSchemaOnlyDuckDB(BaseCachingSelectedSchemaOnly):
12 |     pass
13 | 


--------------------------------------------------------------------------------
/scripts/build-dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eo pipefail
 4 | 
 5 | DBT_PATH="$( cd "$(dirname "$0")/.." ; pwd -P )"
 6 | 
 7 | PYTHON_BIN=${PYTHON_BIN:-python}
 8 | 
 9 | echo "$PYTHON_BIN"
10 | 
11 | set -x
12 | 
13 | rm -rf "$DBT_PATH"/dist
14 | rm -rf "$DBT_PATH"/build
15 | mkdir -p "$DBT_PATH"/dist
16 | 
17 | cd "$DBT_PATH"
18 | $PYTHON_BIN setup.py sdist bdist_wheel
19 | 
20 | set +x
21 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/aliases/test_aliases.py:
--------------------------------------------------------------------------------
 1 | from dbt.tests.adapter.aliases.test_aliases import BaseAliases, BaseAliasErrors, BaseSameAliasDifferentSchemas
 2 | 
 3 | class TestAliasesDuckDB(BaseAliases):
 4 |     pass
 5 | 
 6 | class TestAliasesErrorDuckDB(BaseAliasErrors):
 7 |     pass
 8 | 
 9 | class BaseSameALiasDifferentSchemasDuckDB(BaseSameAliasDifferentSchemas):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/external_location.sql:
--------------------------------------------------------------------------------
1 | {%- macro external_location(relation, config) -%}
2 |   {%- if config.get('options', {}).get('partition_by') is none -%}
3 |     {%- set format = config.get('format', 'parquet') -%}
4 |     {{- adapter.external_root() }}/{{ relation.identifier }}.{{ format }}
5 |   {%- else -%}
6 |     {{- adapter.external_root() }}/{{ relation.identifier }}
7 |   {%- endif -%}
8 | {%- endmacro -%}
9 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/__version__.py:
--------------------------------------------------------------------------------
 1 | from importlib.metadata import version as get_version
 2 | 
 3 | _package_name = "dbt-duckdb"
 4 | version = get_version(_package_name)
 5 | # This is to get around SemVer 2 (dbt_common) vs Linux/Python compatible SemVer 3 (pbr) conflicting
 6 | # See: https://docs.openstack.org/pbr/latest/user/semver.html
 7 | _prerelease_tags = ["dev", "a", "b", "c"]
 8 | for tag in _prerelease_tags:
 9 |     version = version.replace(f".{tag}", f"-{tag}")
10 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/lastday.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__last_day(date, datepart) -%}
 2 | 
 3 |     {%- if datepart == 'quarter' -%}
 4 |     -- duckdb dateadd does not support quarter interval.
 5 |     cast(
 6 |         {{dbt.dateadd('day', '-1',
 7 |         dbt.dateadd('month', '3', dbt.date_trunc(datepart, date))
 8 |         )}}
 9 |         as date)
10 |     {%- else -%}
11 |     {{dbt.default_last_day(date, datepart)}}
12 |     {%- endif -%}
13 | 
14 | {%- endmacro %}
15 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/__init__.py:
--------------------------------------------------------------------------------
 1 | from dbt.adapters.base import AdapterPlugin
 2 | from dbt.adapters.duckdb.connections import DuckDBConnectionManager  # noqa
 3 | from dbt.adapters.duckdb.credentials import DuckDBCredentials
 4 | from dbt.adapters.duckdb.impl import DuckDBAdapter
 5 | from dbt.include import duckdb
 6 | 
 7 | Plugin = AdapterPlugin(
 8 |     adapter=DuckDBAdapter,  # type: ignore
 9 |     credentials=DuckDBCredentials,
10 |     include_path=duckdb.PACKAGE_PATH,
11 | )
12 | 


--------------------------------------------------------------------------------
/tests/create_function_plugin.py:
--------------------------------------------------------------------------------
 1 | from duckdb import DuckDBPyConnection
 2 | 
 3 | from dbt.adapters.duckdb.plugins import BasePlugin
 4 | from dbt.adapters.duckdb.utils import TargetConfig
 5 | 
 6 | 
 7 | def foo() -> int:
 8 |     return 1729
 9 | 
10 | 
11 | class Plugin(BasePlugin):
12 |     def configure_connection(self, conn: DuckDBPyConnection):
13 |         conn.create_function("foo", foo)
14 | 
15 |     def store(self, target_config: TargetConfig):
16 |         assert target_config.config.get("key") == "value"
17 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_persist_docs.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.tests.adapter.persist_docs.test_persist_docs import (
 4 |     BasePersistDocs,
 5 |     BasePersistDocsColumnMissing,
 6 |     BasePersistDocsCommentOnQuotedColumn,
 7 | )
 8 | 
 9 | @pytest.mark.skip_profile("md")
10 | class TestPersistDocs(BasePersistDocs):
11 |     pass
12 | 
13 | 
14 | @pytest.mark.skip_profile("md")
15 | class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing):
16 |     pass
17 | 
18 | 
19 | @pytest.mark.skip_profile("md")
20 | class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn):
21 |     pass
22 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/pd_utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from ..utils import TargetConfig
 4 | 
 5 | 
 6 | def target_to_df(target_config: TargetConfig) -> pd.DataFrame:
 7 |     """Load a dataframe from a target config."""
 8 |     location = target_config.location
 9 |     if location is None:
10 |         raise Exception("Target config does not have a location")
11 |     if location.format == "csv":
12 |         return pd.read_csv(location.path)
13 |     elif location.format == "parquet":
14 |         return pd.read_parquet(location.path)
15 |     else:
16 |         raise Exception(f"Unsupported format: {location.format}")
17 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/hooks.sql:
--------------------------------------------------------------------------------
 1 | -- this macro overrides the default run_hooks macro from dbt-adapters and drops the extra `commit;`
 2 | -- because DuckDB does not begin a txn when a connection is created
 3 | {% macro run_hooks(hooks, inside_transaction=True) %}
 4 |   {% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction)  %}
 5 |     {% set rendered = render(hook.get('sql')) | trim %}
 6 |     {% if (rendered | length) > 0 %}
 7 |       {% call statement(auto_begin=inside_transaction) %}
 8 |         {{ rendered }}
 9 |       {% endcall %}
10 |     {% endif %}
11 |   {% endfor %}
12 | {% endmacro %}
13 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/datediff.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__datediff(first_date, second_date, datepart) -%}
 2 |     {% if datepart == 'week' %}
 3 |             ({{ datediff(first_date, second_date, 'day') }} // 7 + case
 4 |             when date_part('dow', ({{first_date}})::timestamp) <= date_part('dow', ({{second_date}})::timestamp) then
 5 |                 case when {{first_date}} <= {{second_date}} then 0 else -1 end
 6 |             else
 7 |                 case when {{first_date}} <= {{second_date}} then 1 else 0 end
 8 |         end)
 9 |     {% else %}
10 |         (date_diff('{{ datepart }}', {{ first_date }}::timestamp, {{ second_date}}::timestamp ))
11 |     {% endif %}
12 | {%- endmacro %}
13 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/listagg.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}
 2 |     {% if limit_num -%}
 3 |     list_aggr(
 4 |         (array_agg(
 5 |             {{ measure }}
 6 |             {% if order_by_clause -%}
 7 |             {{ order_by_clause }}
 8 |             {%- endif %}
 9 |         ))[1:{{ limit_num }}],
10 |         'string_agg',
11 |         {{ delimiter_text }}
12 |         )
13 |     {%- else %}
14 |     string_agg(
15 |         {{ measure }},
16 |         {{ delimiter_text }}
17 |         {% if order_by_clause -%}
18 |         {{ order_by_clause }}
19 |         {%- endif %}
20 |         )
21 |     {%- endif %}
22 | {%- endmacro %}
23 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/motherduck/fixtures.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Models
 3 | #
 4 | 
 5 | models__gen_data_macro = """
 6 | select * from {{ ref("seed") }}
 7 | """
 8 | 
 9 | #
10 | # Macros
11 | #
12 | 
13 | macros__generate_database_name = """
14 | {% macro generate_database_name(custom_database_name=none, node=none) -%}
15 |     {{ target.database | trim }}_{{ var("build_env") | trim }}_{{ var("org_prefix") | trim }}
16 | {%- endmacro %}
17 | """
18 | 
19 | 
20 | macros__generate_schema_name = """
21 | {% macro generate_schema_name(custom_schema_name=none, node=none) -%}
22 |     {{ target.schema | trim }}_{{ var("build_env") | trim }}_{{ var("org_prefix") | trim }}
23 | {%- endmacro %}
24 | """
25 | 
26 | #
27 | # Seeds
28 | #
29 | 
30 | seeds__example_seed_csv = """a,b,c
31 | 1,2,3
32 | 4,5,6
33 | 7,8,9
34 | """
35 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/columns.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
 2 | 
 3 |   {% if add_columns %}
 4 |     {% for column in add_columns %}
 5 |       {% set sql -%}
 6 |          alter {{ relation.type }} {{ relation }} add column
 7 |            {{ api.Relation.create(identifier=column.name) }} {{ column.data_type }}
 8 |       {%- endset -%}
 9 |       {% do run_query(sql) %}
10 |     {% endfor %}
11 |   {% endif %}
12 | 
13 |   {% if remove_columns %}
14 |     {% for column in remove_columns %}
15 |       {% set sql -%}
16 |         alter {{ relation.type }} {{ relation }} drop column
17 |           {{ api.Relation.create(identifier=column.name) }}
18 |       {%- endset -%}
19 |       {% do run_query(sql) %}
20 |     {% endfor %}
21 |   {% endif %}
22 | 
23 | {% endmacro %}
24 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | # install latest changes in dbt-core + dbt-tests-adapter
 2 | # git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 3 | # git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 4 | 
 5 | dbt-tests-adapter==1.19.5
 6 | 
 7 | boto3
 8 | mypy-boto3-glue
 9 | pandas
10 | pyarrow==22.0.0
11 | buenavista==0.5.0
12 | bumpversion
13 | flaky
14 | freezegun==1.5.5
15 | fsspec
16 | gspread
17 | ipdb
18 | mypy==1.18.2
19 | openpyxl
20 | pip-tools
21 | pre-commit
22 | psycopg2-binary
23 | psycopg[binary]
24 | pyiceberg
25 | pytest
26 | pytest-dotenv
27 | logbook<1.9  # pytest-logbook still imports logbook.compat
28 | pytest-logbook
29 | pytest-csv
30 | pytest-xdist
31 | pytest-mock
32 | testcontainers[postgres]
33 | pytz
34 | ruff
35 | sqlalchemy
36 | tox>=3.13
37 | twine
38 | wheel
39 | deltalake
40 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/dateadd.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__dateadd(datepart, interval, from_date_or_timestamp) %}
 2 | 
 3 |     {#
 4 |       Support both literal and expression intervals (e.g., column references)
 5 |       by multiplying an INTERVAL by the value. This avoids DuckDB parser issues
 6 |       with "interval (<expr>) <unit>" and works across versions.
 7 | 
 8 |       Also map unsupported units:
 9 |       - quarter => 3 months
10 |       - week    => 7 days (DuckDB supports WEEK as a literal, but keep it explicit)
11 |     #}
12 | 
13 |     {%- set unit = datepart | lower -%}
14 |     {%- if unit == 'quarter' -%}
15 |         ({{ from_date_or_timestamp }} + (cast({{ interval }} as bigint) * 3) * interval 1 month)
16 |     {%- elif unit == 'week' -%}
17 |         ({{ from_date_or_timestamp }} + (cast({{ interval }} as bigint) * 7) * interval 1 day)
18 |     {%- else -%}
19 |         ({{ from_date_or_timestamp }} + cast({{ interval }} as bigint) * interval 1 {{ unit }})
20 |     {%- endif -%}
21 | 
22 | {% endmacro %}
23 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/motherduck/conftest.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from datetime import datetime
 3 | import pytest
 4 | import duckdb
 5 | import os
 6 | 
 7 | 
 8 | @pytest.fixture(scope="session")
 9 | def test_database_name():
10 |     """Generate a unique database name for the entire motherduck test session"""
11 |     date_str = datetime.now().strftime("%Y%m%d")
12 |     random_suffix = uuid.uuid4().hex[:6]
13 |     db_name = f"test_db_{date_str}_{random_suffix}"
14 | 
15 |     # Create the database once for all tests
16 |     token = os.environ.get("MOTHERDUCK_TOKEN") or os.environ.get("TEST_MOTHERDUCK_TOKEN")
17 |     if token:
18 |         conn = duckdb.connect(f"md:?motherduck_token={token}")
19 |         conn.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}")
20 |         conn.close()
21 | 
22 |     yield db_name
23 | 
24 |     # Clean up: drop the database after all tests complete
25 |     if token:
26 |         conn = duckdb.connect(f"md:?motherduck_token={token}")
27 |         conn.execute(f"DROP DATABASE IF EXISTS {db_name}")
28 |         conn.close()
29 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/incremental_strategy/merge_defaults.sql:
--------------------------------------------------------------------------------
 1 | {% macro merge_clause_defaults(
 2 |     merge_update_condition,
 3 |     merge_insert_condition,
 4 |     merge_update_columns=[],
 5 |     merge_exclude_columns=[],
 6 |     merge_update_set_expressions={}
 7 | ) -%}
 8 | 
 9 |     {{ return({
10 |         'when_matched_update_by_name': {
11 |             'action': 'update',
12 |             'condition': merge_update_condition,
13 |             'mode': 'by_name'
14 |         },
15 |         'when_not_matched_insert_by_name': {
16 |             'action': 'insert',
17 |             'condition': merge_insert_condition,
18 |             'mode': 'by_name'
19 |         },
20 |         'when_matched_update_explicit': {
21 |             'action': 'update',
22 |             'condition': merge_update_condition,
23 |             'mode': 'explicit',
24 |             'update': {
25 |                 'include': merge_update_columns,
26 |                 'exclude': merge_exclude_columns,
27 |                 'set_expressions': merge_update_set_expressions
28 |             }
29 |         }
30 |     }) }}
31 | {%- endmacro %}
32 | 


--------------------------------------------------------------------------------
/tests/functional/fsspec/test_filesystems.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from dbt.tests.util import run_dbt
 3 | from dbt.adapters.duckdb.connections import DuckDBConnectionManager
 4 | 
 5 | models_file_model_sql = """
 6 | {{ config(materialized='table') }}
 7 | select *
 8 | from read_csv_auto('github://data/team_ratings.csv')
 9 | WHERE conf = 'West'
10 | """
11 | 
12 | 
13 | @pytest.mark.skip_profile("buenavista", "md")
14 | class TestFilesystems:
15 |     @pytest.fixture(scope="class")
16 |     def dbt_profile_target(self, dbt_profile_target):
17 |         return {
18 |             "type": "duckdb",
19 |             "path": dbt_profile_target.get("path", ":memory:"),
20 |             "filesystems": [
21 |                 {"fs": "github", "org": "jwills", "repo": "nba_monte_carlo"}
22 |             ],
23 |         }
24 | 
25 |     @pytest.fixture(scope="class")
26 |     def models(self):
27 |         return {
28 |             "file_model.sql": models_file_model_sql,
29 |         }
30 | 
31 |     def test_filesystems(self, project):
32 |         DuckDBConnectionManager.close_all_connections()
33 |         results = run_dbt()
34 |         assert len(results) == 1
35 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/iceberg.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from typing import Dict
 3 | 
 4 | import pyiceberg.catalog
 5 | 
 6 | from . import BasePlugin
 7 | from ..utils import SourceConfig
 8 | 
 9 | 
10 | class Plugin(BasePlugin):
11 |     def initialize(self, config: Dict[str, Any]):
12 |         if "catalog" not in config:
13 |             raise Exception("'catalog' is a required argument for the iceberg plugin!")
14 |         catalog = config.pop("catalog")
15 |         self._catalog = pyiceberg.catalog.load_catalog(catalog, **config)
16 | 
17 |     def load(self, source_config: SourceConfig):
18 |         table_format = source_config.get("iceberg_table", "{schema}.{identifier}")
19 |         table_name = table_format.format(**source_config.as_dict())
20 |         table = self._catalog.load_table(table_name)
21 |         scan_keys = {
22 |             "row_filter",
23 |             "selected_fields",
24 |             "case_sensitive",
25 |             "snapshot_id",
26 |             "options",
27 |             "limit",
28 |         }
29 |         scan_config = {k: source_config[k] for k in scan_keys if k in source_config}
30 |         return table.scan(**scan_config).to_arrow()
31 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/utils/test_date_spine.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.tests.util import (
 4 |     run_dbt,
 5 | )
 6 | 
 7 | my_date_spine_model = """
 8 | {{
 9 |     config(
10 |         materialized = 'table',
11 |     )
12 | }}
13 | 
14 | with days as (
15 | 
16 |     {{
17 |         dbt_utils.date_spine(
18 |             'day',
19 |             "'2024-01-01'::timestamp",
20 |             dbt.dateadd('day', 1, "'2024-02-01'::timestamp"),
21 |         )
22 |     }}
23 | 
24 | ),
25 | 
26 | final as (
27 |     select cast(date_day as date) as date_day
28 |     from days
29 | )
30 | 
31 | select * from final
32 | """
33 | 
34 | class TestDateSpine:
35 |     @pytest.fixture(scope="class")
36 |     def models(self):
37 |         return {
38 |             "date_spine.sql": my_date_spine_model,
39 |         }
40 |     
41 |     @pytest.fixture(scope="class")
42 |     def packages(self):
43 |         return {"packages": [{"package": "dbt-labs/dbt_utils", "version": "1.1.1"}]}
44 | 
45 |     def test_date_spine(self, project):
46 | 
47 |         # install dbt_utils
48 |         run_dbt(["deps"])
49 |         # run command
50 |         results = run_dbt()
51 |         # run result length
52 |         assert len(results) == 1
53 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/incremental_strategy/validation_helper.sql:
--------------------------------------------------------------------------------
 1 | {%- macro validate_string_field(field_value, field_name, errors) -%}
 2 |   {%- if field_value is not none and field_value is not string -%}
 3 |     {%- do errors.append(field_name ~ " must be a string, found: " ~ field_value) -%}
 4 |   {%- endif -%}
 5 | {%- endmacro -%}
 6 | 
 7 | {%- macro validate_string_list_field(field_value, field_name, errors) -%}
 8 |   {%- if field_value is not none -%}
 9 |     {%- if field_value is not sequence or field_value is mapping or field_value is string -%}
10 |       {%- do errors.append(field_name ~ " must be a list") -%}
11 |     {%- else -%}
12 |       {%- for item in field_value -%}
13 |         {%- if item is not string -%}
14 |           {%- do errors.append(field_name ~ " must contain only string values, found: " ~ item) -%}
15 |         {%- endif -%}
16 |       {%- endfor -%}
17 |     {%- endif -%}
18 |   {%- endif -%}
19 | {%- endmacro -%}
20 | 
21 | {%- macro validate_dict_field(field_value, field_name, errors) -%}
22 |   {%- if field_value is not none and field_value is not mapping -%}
23 |     {%- do errors.append(field_name ~ " must be a dictionary, found: " ~ field_value) -%}
24 |   {%- endif -%}
25 | {%- endmacro -%}
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | venv/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | logs/
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 | test.env
50 | .mypy_cache/
51 | 
52 | # Translations
53 | *.mo
54 | *.pot
55 | 
56 | # Django stuff:
57 | *.log
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # PyBuilder
63 | target/
64 | 
65 | #Ipython Notebook
66 | .ipynb_checkpoints
67 | 
68 | #Emacs
69 | *~
70 | 
71 | # Sublime Text
72 | *.sublime-*
73 | 
74 | # Vim
75 | *.sw*
76 | 
77 | .python-version
78 | 
79 | .DS_Store
80 | .idea/
81 | .vscode/
82 | .env
83 | 
84 | .venv
85 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_unit_testing.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes
 4 | from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity
 5 | from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput
 6 | 
 7 | 
 8 | @pytest.mark.skip_profile("buenavista")
 9 | class TestUnitTestingTypesDuckDB(BaseUnitTestingTypes):
10 |     @pytest.fixture
11 |     def data_types(self):
12 |         # sql_value, yaml_value
13 |         return [
14 |             ["1", "1"],
15 |             ["2.0", "2.0"],
16 |             ["'12345'", "12345"],
17 |             ["'string'", "string"],
18 |             ["true", "true"],
19 |             ["DATE '2020-01-02'", "2020-01-02"],
20 |             ["TIMESTAMP '2013-11-03 00:00:00-0'", "2013-11-03 00:00:00-0"],
21 |             ["'2013-11-03 00:00:00-0'::TIMESTAMPTZ", "2013-11-03 00:00:00-0"],
22 |             [
23 |                 "{'Alberta':'Edmonton','Manitoba':'Winnipeg'}",
24 |                 "{'Alberta':'Edmonton','Manitoba':'Winnipeg'}",
25 |             ],
26 |             ["ARRAY['a','b','c']", "['a','b','c']"],
27 |             ["ARRAY[1,2,3]", "[1, 2, 3]"],
28 |         ]
29 | 
30 | 
31 | class TestUnitTestCaseInsensitivityDuckDB(BaseUnitTestCaseInsensivity):
32 |     pass
33 | 
34 | 
35 | class TestUnitTestInvalidInputDuckDB(BaseUnitTestInvalidInput):
36 |     pass
37 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # For more on configuring pre-commit hooks (see https://pre-commit.com/)
 2 | 
 3 | # TODO: remove global exclusion of tests when testing overhaul is complete
 4 | exclude: "^tests/.*"
 5 | 
 6 | 
 7 | default_language_version:
 8 |   python: python3.11
 9 | 
10 | repos:
11 | - repo: https://github.com/pre-commit/pre-commit-hooks
12 |   rev: v3.2.0
13 |   hooks:
14 |   - id: check-yaml
15 |     args: [--unsafe]
16 |   - id: check-json
17 |     exclude: ^.devcontainer/
18 |   - id: end-of-file-fixer
19 |   - id: trailing-whitespace
20 |   - id: check-case-conflict
21 | - repo: https://github.com/asottile/reorder_python_imports
22 |   rev: v3.9.0
23 |   hooks:
24 |   -  id: reorder-python-imports
25 | - repo: https://github.com/astral-sh/ruff-pre-commit
26 |   # Ruff version.
27 |   rev: v0.1.7
28 |   hooks:
29 |     # Run the linter.
30 |     - id: ruff
31 |       args:
32 |       - "--line-length=99"
33 |       - "--fix"
34 |     # Run the formatter.
35 |     - id: ruff-format
36 |       args:
37 |       - "--line-length=99"
38 | - repo: https://github.com/pre-commit/mirrors-mypy
39 |   rev: v0.782
40 |   hooks:
41 |   - id: mypy
42 |     args: [--show-error-codes, --ignore-missing-imports]
43 |     files: ^dbt/adapters/.*
44 |     language: system
45 |   - id: mypy
46 |     alias: mypy-check
47 |     stages: [manual]
48 |     args: [--show-error-codes, --pretty, --ignore-missing-imports]
49 |     files: ^dbt/adapters
50 |     language: system
51 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/python-3/.devcontainer/base.Dockerfile
 2 | 
 3 | # [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
 4 | ARG VARIANT="3.10-bullseye"
 5 | FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 6 | 
 7 | # [Choice] Node.js version: none, lts/*, 16, 14, 12, 10
 8 | ARG NODE_VERSION="none"
 9 | RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi
10 | 
11 | # [Optional] If your pip requirements rarely change, uncomment this section to add them to the image.
12 | # COPY requirements.txt /tmp/pip-tmp/
13 | # RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
14 | #    && rm -rf /tmp/pip-tmp
15 | 
16 | # [Optional] Uncomment this section to install additional OS packages.
17 | # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
18 | #     && apt-get -y install --no-install-recommends <your-package-list-here>
19 | 
20 | # [Optional] Uncomment this line to install global node packages.
21 | # RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g <your-package-here>" 2>&1
22 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/delta.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from typing import Dict
 3 | 
 4 | from deltalake import DeltaTable
 5 | 
 6 | from . import BasePlugin
 7 | from ..utils import SourceConfig
 8 | 
 9 | 
10 | class Plugin(BasePlugin):
11 |     def initialize(self, config: Dict[str, Any]):
12 |         pass
13 | 
14 |     def configure_cursor(self, cursor):
15 |         pass
16 | 
17 |     def load(self, source_config: SourceConfig):
18 |         if "delta_table_path" not in source_config:
19 |             raise Exception("'delta_table_path' is a required argument for the delta table!")
20 | 
21 |         table_path = source_config["delta_table_path"]
22 |         storage_options = source_config.get("storage_options", None)
23 | 
24 |         if storage_options:
25 |             dt = DeltaTable(table_path, storage_options=storage_options)
26 |         else:
27 |             dt = DeltaTable(table_path)
28 | 
29 |         # delta attributes
30 |         as_of_version = source_config.get("as_of_version", None)
31 |         as_of_datetime = source_config.get("as_of_datetime", None)
32 | 
33 |         if as_of_version:
34 |             dt.load_as_version(as_of_version)
35 | 
36 |         if as_of_datetime:
37 |             dt.load_as_version(as_of_datetime)
38 | 
39 |         df = dt.to_pyarrow_dataset()
40 | 
41 |         return df
42 | 
43 |     def default_materialization(self):
44 |         return "view"
45 | 
46 | 
47 | # Future
48 | # TODO add databricks catalog
49 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = dbt-duckdb
 3 | author = Josh Wills
 4 | author_email = joshwills+dbt@gmail.com
 5 | url = https://github.com/jwills/dbt-duckdb
 6 | summary = The duckdb adapter plugin for dbt (data build tool)
 7 | description_file = README.md
 8 | long_description_content_type = text/markdown
 9 | license = Apache-2
10 | classifier =
11 |     Development Status :: 5 - Production/Stable
12 |     License :: OSI Approved :: Apache Software License
13 |     Operating System :: Microsoft :: Windows
14 |     Operating System :: MacOS :: MacOS X
15 |     Operating System :: POSIX :: Linux
16 |     Programming Language :: Python :: 3.10
17 |     Programming Language :: Python :: 3.11
18 |     Programming Language :: Python :: 3.12
19 |     Programming Language :: Python :: 3.13
20 | keywords =
21 |     setup
22 |     distutils
23 | 
24 | [options]
25 | install_requires=
26 |     dbt-common>=1,<2
27 |     dbt-adapters>=1,<2
28 |     duckdb>=1.0.0
29 |     # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
30 |     dbt-core>=1.8.0
31 | python_requires = >=3.10
32 | include_package_data = True
33 | packages = find_namespace:
34 | 
35 | [options.packages.find]
36 | include =
37 |     dbt
38 |     dbt.*
39 | 
40 | [build-system]
41 | requires = ["setuptools >= 61.2", "pbr>=1.9"]
42 | 
43 | [extras]
44 | glue =
45 |     boto3
46 |     mypy-boto3-glue
47 | md =
48 |     duckdb==1.4.2
49 | 
50 | [files]
51 | packages =
52 |     dbt-duckdb
53 | 


--------------------------------------------------------------------------------
/tests/unit/test_retries_connect.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from unittest.mock import patch
 3 | 
 4 | from duckdb import IOException
 5 | 
 6 | from dbt.adapters.duckdb.credentials import DuckDBCredentials
 7 | from dbt.adapters.duckdb.credentials import Retries
 8 | from dbt.adapters.duckdb.environments import Environment
 9 | 
10 | class TestConnectRetries:
11 | 
12 |     @pytest.fixture
13 |     def creds(self):
14 |         # Create a mock credentials object
15 |         return DuckDBCredentials(
16 |             path="foo.db",
17 |             retries=Retries(connect_attempts=2, retryable_exceptions=["IOException", "ArithmeticError"])
18 |         )
19 | 
20 |     @pytest.mark.parametrize("exception", [None, IOException, ArithmeticError, ValueError])
21 |     def test_initialize_db(self, creds, exception):
22 |         # Mocking the duckdb.connect method
23 |         with patch('duckdb.connect') as mock_connect:
24 |             if exception:
25 |                 mock_connect.side_effect = [exception, None]
26 | 
27 |             if exception == ValueError:
28 |                 with pytest.raises(ValueError) as excinfo:
29 |                     Environment.initialize_db(creds)
30 |             else:
31 |                 # Call the initialize_db method
32 |                 Environment.initialize_db(creds)
33 |                 if exception in {IOException, ArithmeticError}:
34 |                     assert mock_connect.call_count == creds.retries.connect_attempts
35 |                 else:
36 |                     mock_connect.assert_called_once_with(creds.path, read_only=False, config={})
37 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/catalog.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | {% macro duckdb__get_catalog(information_schema, schemas) -%}
 3 |   {%- call statement('catalog', fetch_result=True) -%}
 4 |     with relations AS (
 5 |       select
 6 |         t.table_name
 7 |         , t.database_name
 8 |         , t.schema_name
 9 |         , 'BASE TABLE' as table_type
10 |         , t.comment as table_comment
11 |       from duckdb_tables() t
12 |       WHERE t.database_name = '{{ database }}'
13 |       UNION ALL
14 |       SELECT v.view_name as table_name
15 |       , v.database_name
16 |       , v.schema_name
17 |       , 'VIEW' as table_type
18 |       , v.comment as table_comment
19 |       from duckdb_views() v
20 |       WHERE v.database_name = '{{ database }}'
21 |     )
22 |     select
23 |         '{{ database }}' as table_database,
24 |         r.schema_name as table_schema,
25 |         r.table_name,
26 |         r.table_type,
27 |         r.table_comment,
28 |         c.column_name,
29 |         c.column_index as column_index,
30 |         c.data_type as column_type,
31 |         c.comment as column_comment,
32 |         NULL as table_owner
33 |     FROM relations r JOIN duckdb_columns() c ON r.schema_name = c.schema_name AND r.table_name = c.table_name
34 |     WHERE (
35 |         {%- for schema in schemas -%}
36 |           upper(r.schema_name) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
37 |         {%- endfor -%}
38 |     )
39 |     ORDER BY
40 |         r.schema_name,
41 |         r.table_name,
42 |         c.column_index
43 |   {%- endcall -%}
44 |   {{ return(load_result('catalog').table) }}
45 | {%- endmacro %}
46 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/environments/motherduck.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from .. import credentials
 4 | from .local import DuckDBConnectionWrapper
 5 | from .local import LocalEnvironment
 6 | from dbt.adapters.contracts.connection import AdapterResponse
 7 | 
 8 | 
 9 | MOTHERDUCK_SAAS_MODE_QUERY = """
10 | SELECT value FROM duckdb_settings() WHERE name = 'motherduck_saas_mode'
11 | """
12 | 
13 | 
14 | class MotherDuckEnvironment(LocalEnvironment):
15 |     def __init__(self, credentials: credentials.DuckDBCredentials):
16 |         self._motherduck_saas_mode: Optional[bool] = None
17 |         super().__init__(credentials)
18 | 
19 |     def motherduck_saas_mode(self, handle: DuckDBConnectionWrapper):
20 |         # Return cached value
21 |         if self._motherduck_saas_mode is True:
22 |             return True
23 |         # Get SaaS mode from DuckDB config
24 |         con = handle.cursor()
25 |         (motherduck_saas_mode,) = con.sql(MOTHERDUCK_SAAS_MODE_QUERY).fetchone()
26 |         if str(motherduck_saas_mode).lower() in ["1", "true"]:
27 |             self._motherduck_saas_mode = True
28 |             return True
29 |         return False
30 | 
31 |     def submit_python_job(self, handle, parsed_model: dict, compiled_code: str) -> AdapterResponse:
32 |         # Block local file access if SaaS mode is on
33 |         if self.motherduck_saas_mode(handle) is True:
34 |             raise RuntimeError("Python models are disabled when MotherDuck SaaS Mode is on.")
35 |         return super().submit_python_job(
36 |             handle=handle, parsed_model=parsed_model, compiled_code=compiled_code
37 |         )
38 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/incremental_strategy/delete_insert.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__get_delete_insert_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) -%}
 2 | 
 3 |     {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
 4 | 
 5 |     {% if unique_key %}
 6 |         {% if unique_key is sequence and unique_key is not string %}
 7 |             delete from {{target }} as DBT_INCREMENTAL_TARGET
 8 |             using {{ source }}
 9 |             where (
10 |                 {% for key in unique_key %}
11 |                     {{ source }}.{{ key }} = DBT_INCREMENTAL_TARGET.{{ key }}
12 |                     {{ "and " if not loop.last}}
13 |                 {% endfor %}
14 |                 {% if incremental_predicates %}
15 |                     {% for predicate in incremental_predicates %}
16 |                         and {{ predicate }}
17 |                     {% endfor %}
18 |                 {% endif %}
19 |             );
20 |         {% else %}
21 |             delete from {{ target }}
22 |             where (
23 |                 {{ unique_key }}) in (
24 |                 select ({{ unique_key }})
25 |                 from {{ source }}
26 |             )
27 |             {%- if incremental_predicates %}
28 |                 {% for predicate in incremental_predicates %}
29 |                     and {{ predicate }}
30 |                 {% endfor %}
31 |             {%- endif -%};
32 | 
33 |         {% endif %}
34 |     {% endif %}
35 | 
36 |     insert into {{ target }} ({{ dest_cols_csv }})
37 |     (
38 |         select {{ dest_cols_csv }}
39 |         from {{ source }}
40 |     )
41 | 
42 | {%- endmacro %}
43 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/sqlalchemy.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from typing import Dict
 3 | 
 4 | import pandas as pd
 5 | from sqlalchemy import create_engine
 6 | from sqlalchemy import text
 7 | 
 8 | from . import BasePlugin
 9 | from . import pd_utils
10 | from ..utils import SourceConfig
11 | from ..utils import TargetConfig
12 | 
13 | 
14 | class Plugin(BasePlugin):
15 |     def initialize(self, plugin_config: Dict[str, Any]):
16 |         self.engine = create_engine(plugin_config.pop("connection_url"), **plugin_config)
17 | 
18 |     def load(self, source_config: SourceConfig) -> pd.DataFrame:
19 |         if "query" in source_config:
20 |             query = source_config["query"]
21 |             query = query.format(**source_config.as_dict())
22 |             params = source_config.get("params", {})
23 |             with self.engine.connect() as conn:
24 |                 return pd.read_sql_query(text(query), con=conn, params=params)
25 |         else:
26 |             if "table" in source_config:
27 |                 table = source_config["table"]
28 |             else:
29 |                 table = source_config.table_name()
30 |             with self.engine.connect() as conn:
31 |                 return pd.read_sql_table(table, con=conn)
32 | 
33 |     def store(self, target_config: TargetConfig):
34 |         # first, load the data frame from the external location
35 |         df = pd_utils.target_to_df(target_config)
36 |         table_name = target_config.relation.identifier
37 |         # then, write it to the database
38 |         df.to_sql(table_name, self.engine, if_exists="replace", index=False)
39 | 
40 |     def __del__(self):
41 |         self.engine.dispose()
42 |         self.engine = None
43 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | 1.4.0 (2023-02-14)
 2 | ------------------
 3 | 
 4 | - Added support for DuckDB 0.7.x and the ability to `ATTACH` additional databases
 5 | 
 6 | 1.3.2 (2022-11-16)
 7 | ------------------
 8 | 
 9 | - Added support for DuckDB 0.6.x
10 | 
11 | 1.3.1 (2022-11-07)
12 | ------------------
13 | 
14 | - Support for Python models in dbt-duckdb
15 | - Support for the `external` materialization type
16 | 
17 | 1.2.3 (2022-10-24)
18 | ------------------
19 | 
20 | - Added the `settings` dictionary for configuring arbitrary settings in the DuckDB
21 | instance used during the dbt run
22 | 
23 | 1.2.2 (2022-10-05)
24 | ------------------
25 | 
26 | - Fixed a small bug in the multithreading implementation
27 | 
28 | 1.2.1 (2022-10-03)
29 | ------------------
30 | 
31 | - Added support for multi-threaded dbt-duckdb runs
32 | 
33 | 1.2.0 (2022-09-26)
34 | ------------------
35 | 
36 | - Support for loading DuckDB extensions
37 | - Support for reading/writing from S3 via the aforementioned extensions
38 | 
39 | 1.1.4 (2022-07-06)
40 | ------------------
41 | 
42 | - Enforces the single-thread limit on the dbt-duckdb profile
43 | 
44 | 1.1.3 (2022-06-29)
45 | ------------------
46 | 
47 | - Fixes DuckDB 0.4.0 compatibility issue
48 | 
49 | 1.1.2 (2022-06-29)
50 | ------------------
51 | 
52 | - Align with minor version of dbt-core
53 | - Constrain range of compatible duckdb versions
54 | 
55 | 1.1.1 (2022-04-06)
56 | ------------------
57 | 
58 | - Fix typo in package description
59 | 
60 | 1.1.0 (2022-04-06)
61 | ------------------
62 | 
63 | - Upgraded to DuckDB 0.3.2
64 | - Refactored adapter so that dbt threads > 1 work with DuckDB
65 | 
66 | 1.0.0 (2022-01-10)
67 | ------------------
68 | 
69 | - Upgraded to DuckDB 0.3.1
70 | - First basically working version
71 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/test_glue.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from dbt.tests.adapter.basic.files import (
 4 |     base_table_sql,
 5 |     model_base,
 6 |     schema_base_yml,
 7 |     seeds_base_csv,
 8 | )
 9 | from dbt.tests.util import (
10 |     run_dbt,
11 | )
12 | 
13 | config_materialized_glue = """
14 |   {{ config(materialized="external", glue_register=true, glue_database='db2') }}
15 | """
16 | default_glue_sql = config_materialized_glue + model_base
17 | 
18 | 
19 | @pytest.mark.skip
20 | class TestGlueMaterializations:
21 |     @pytest.fixture(scope="class")
22 |     def models(self):
23 |         return {
24 |             "table_model.sql": base_table_sql,
25 |             "table_default.sql": default_glue_sql,
26 |             "schema.yml": schema_base_yml,
27 |         }
28 | 
29 |     @pytest.fixture(scope="class")
30 |     def seeds(self):
31 |         return {
32 |             "base.csv": seeds_base_csv,
33 |         }
34 | 
35 |     @pytest.fixture(scope="class")
36 |     def dbt_profile_target(self, dbt_profile_target):
37 |         dbt_profile_target["external_root"] = "s3://duckdbtest/glue_test"
38 |         dbt_profile_target["extensions"] = ["httpfs"]
39 |         dbt_profile_target["settings"] = {
40 |             "s3_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
41 |             "s3_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
42 |             "s3_region": "us-west-2",
43 |         }
44 |         return dbt_profile_target
45 | 
46 |     def test_base(self, project):
47 |         # seed command
48 |         results = run_dbt(["seed"])
49 |         # seed result length
50 |         assert len(results) == 1
51 | 
52 |         # run command
53 |         results = run_dbt()
54 |         # run result length
55 |         assert len(results) == 2
56 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/persist_docs.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | {#
 3 |   The logic in this file is adapted from dbt-postgres, since DuckDB matches
 4 |   the Postgres relation/column commenting model as of 0.10.1
 5 | #}
 6 | 
 7 | {#
 8 |   By using dollar-quoting like this, users can embed anything they want into their comments
 9 |   (including nested dollar-quoting), as long as they do not use this exact dollar-quoting
10 |   label. It would be nice to just pick a new one but eventually you do have to give up.
11 | #}
12 | {% macro duckdb_escape_comment(comment) -%}
13 |   {% if comment is not string %}
14 |     {% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %}
15 |   {% endif %}
16 |   {%- set magic = '$dbt_comment_literal_block$' -%}
17 |   {%- if magic in comment -%}
18 |     {%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%}
19 |   {%- endif -%}
20 |   {{ magic }}{{ comment }}{{ magic }}
21 | {%- endmacro %}
22 | 
23 | {% macro duckdb__alter_relation_comment(relation, comment) %}
24 |   {% set escaped_comment = duckdb_escape_comment(comment) %}
25 |   comment on {{ relation.type }} {{ relation }} is {{ escaped_comment }};
26 | {% endmacro %}
27 | 
28 | 
29 | {% macro duckdb__alter_column_comment(relation, column_dict) %}
30 |   {% set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list %}
31 |   {% for column_name in column_dict if (column_name in existing_columns) %}
32 |     {% set comment = column_dict[column_name]['description'] %}
33 |     {% set escaped_comment = duckdb_escape_comment(comment) %}
34 |     comment on column {{ relation }}.{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} is {{ escaped_comment }};
35 |   {% endfor %}
36 | {% endmacro %}
37 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_community_extensions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from dbt.tests.util import (
 3 |     check_relation_types,
 4 |     check_relations_equal,
 5 |     check_result_nodes_by_name,
 6 |     relation_from_name,
 7 |     run_dbt,
 8 | )
 9 | 
10 | @pytest.mark.skip_profile("buenavista", "nightly", reason="Cannot install community extensions for nightly release")
11 | class BaseCommunityExtensions:
12 | 
13 |     @pytest.fixture(scope="class")
14 |     def dbt_profile_target(self, dbt_profile_target):
15 |         dbt_profile_target["extensions"] = [
16 |             {"name": "quack", "repo": "community"},
17 |         ]
18 |         return dbt_profile_target
19 | 
20 |     @pytest.fixture(scope="class")
21 |     def models(self):
22 |         return {
23 |             "quack_model.sql": "select quack('world') as quack_world",
24 |         }
25 | 
26 |     @pytest.fixture(scope="class")
27 |     def project_config_update(self):
28 |         return {
29 |             "name": "base",
30 |         }
31 | 
32 |     def test_base(self, project):
33 | 
34 |         # run command
35 |         results = run_dbt()
36 |         # run result length
37 |         assert len(results) == 1
38 | 
39 |         # names exist in result nodes
40 |         check_result_nodes_by_name(
41 |             results,
42 |             [
43 |                 "quack_model",
44 |             ],
45 |         )
46 | 
47 |         # check relation types
48 |         expected = {
49 |             "quack_model": "view",
50 |         }
51 |         check_relation_types(project.adapter, expected)
52 | 
53 | @pytest.mark.skip_profile("nightly", reason="Cannot install community extensions for nightly release")
54 | @pytest.mark.skip_profile("buenavista")
55 | class TestCommunityExtensions(BaseCommunityExtensions):
56 |     pass
57 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/indexes/fixtures.py:
--------------------------------------------------------------------------------
 1 | models__incremental_sql = """
 2 | {{
 3 |   config(
 4 |     materialized = "incremental",
 5 |     indexes=[
 6 |       {'columns': ['column_a']},
 7 |       {'columns': ['column_a', 'column_b'], 'unique': True},
 8 |     ]
 9 |   )
10 | }}
11 | 
12 | select *
13 | from (
14 |   select 1 as column_a, 2 as column_b
15 | ) t
16 | 
17 | {% if is_incremental() %}
18 |     where column_a > (select max(column_a) from {{this}})
19 | {% endif %}
20 | 
21 | """
22 | 
23 | models__table_sql = """
24 | {{
25 |   config(
26 |     materialized = "table",
27 |     indexes=[
28 |       {'columns': ['column_a']},
29 |       {'columns': ['column_b']},
30 |       {'columns': ['column_a', 'column_b']},
31 |       {'columns': ['column_b', 'column_a'], 'unique': True},
32 |       {'columns': ['column_a']}
33 |     ]
34 |   )
35 | }}
36 | 
37 | select 1 as column_a, 2 as column_b
38 | 
39 | """
40 | 
41 | snapshots__colors_sql = """
42 | {% snapshot colors %}
43 | 
44 |     {{
45 |         config(
46 |             target_database=database,
47 |             target_schema=schema,
48 |             unique_key='id',
49 |             strategy='check',
50 |             check_cols=['color'],
51 |             indexes=[
52 |               {'columns': ['id']},
53 |               {'columns': ['id', 'color'], 'unique': True},
54 |             ]
55 |         )
56 |     }}
57 | 
58 |     {% if var('version') == 1 %}
59 | 
60 |         select 1 as id, 'red' as color union all
61 |         select 2 as id, 'green' as color
62 | 
63 |     {% else %}
64 | 
65 |         select 1 as id, 'blue' as color union all
66 |         select 2 as id, 'green' as color
67 | 
68 |     {% endif %}
69 | 
70 | {% endsnapshot %}
71 | 
72 | """
73 | 
74 | seeds__seed_csv = """country_code,country_name
75 | US,United States
76 | CA,Canada
77 | GB,United Kingdom
78 | """
79 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_hooks.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | import pytest
 3 | from dbt.tests.util import run_dbt, relation_from_name
 4 | 
 5 | basic_model_sql = """
 6 | select range from range(3)
 7 | """
 8 | 
 9 | test_table = f"test_table_{str(uuid.uuid1()).replace('-', '_')}"
10 | 
11 | post_hook_sql = f"create table {test_table} as select 1;"
12 | 
13 | 
14 | class TestPostHook:
15 |     """
16 |     Post hook should run inside txn
17 |     """
18 | 
19 |     @pytest.fixture(scope="class")
20 |     def project_config_update(self):
21 |         return {
22 |             "name": "base",
23 |             "models": {"post-hook": [{"sql": post_hook_sql}]},
24 |         }
25 | 
26 |     @pytest.fixture(scope="class")
27 |     def models(self):
28 |         return {
29 |             "basic_model.sql": basic_model_sql,
30 |         }
31 | 
32 |     def test_run(self, project):
33 |         run_dbt(["run"])
34 | 
35 |         # check that the model was run
36 |         relation = relation_from_name(project.adapter, "basic_model")
37 |         result = project.run_sql(
38 |             f"select count(*) as num_rows from {relation}", fetch="one"
39 |         )
40 |         assert result[0] == 3
41 | 
42 |         # check that the post hook was run
43 |         result = project.run_sql(
44 |             f"select count(*) as num_rows from {test_table}", fetch="one"
45 |         )
46 |         assert result[0] == 1
47 | 
48 |         # reset
49 |         project.run_sql(f"drop table {test_table}")
50 | 
51 | 
52 | class TestPostHookTransactionFalse(TestPostHook):
53 |     """
54 |     Post hook should run outside txn
55 |     """
56 | 
57 |     @pytest.fixture(scope="class")
58 |     def project_config_update(self):
59 |         return {
60 |             "name": "base",
61 |             "models": {"post-hook": [{"sql": post_hook_sql, "transaction": False}]},
62 |         }
63 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/snapshot_helper.sql:
--------------------------------------------------------------------------------
 1 | {% macro duckdb__snapshot_merge_sql(target, source, insert_cols) -%}
 2 |     {%- set insert_cols_csv = insert_cols | join(', ') -%}
 3 | 
 4 |     {%- set columns = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() -%}
 5 | 
 6 |     update {{ target }} as DBT_INTERNAL_TARGET
 7 |     set {{ columns.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ columns.dbt_valid_to }}
 8 |     from {{ source }} as DBT_INTERNAL_SOURCE
 9 |     where DBT_INTERNAL_SOURCE.{{ columns.dbt_scd_id }}::text = DBT_INTERNAL_TARGET.{{ columns.dbt_scd_id }}::text
10 |       and DBT_INTERNAL_SOURCE.dbt_change_type::text in ('update'::text, 'delete'::text)
11 |       {% if config.get("dbt_valid_to_current") %}
12 |         and (DBT_INTERNAL_TARGET.{{ columns.dbt_valid_to }} = {{ config.get('dbt_valid_to_current') }} or DBT_INTERNAL_TARGET.{{ columns.dbt_valid_to }} is null);
13 |       {% else %}
14 |         and DBT_INTERNAL_TARGET.{{ columns.dbt_valid_to }} is null;
15 |       {% endif %}
16 | 
17 |     insert into {{ target }} ({{ insert_cols_csv }})
18 |     select {% for column in insert_cols -%}
19 |         DBT_INTERNAL_SOURCE.{{ column }} {%- if not loop.last %}, {%- endif %}
20 |     {%- endfor %}
21 |     from {{ source }} as DBT_INTERNAL_SOURCE
22 |     where DBT_INTERNAL_SOURCE.dbt_change_type::text = 'insert'::text;
23 | 
24 | {% endmacro %}
25 | 
26 | {% macro build_snapshot_staging_table(strategy, sql, target_relation) %}
27 |     {% set temp_relation = make_temp_relation(target_relation) %}
28 | 
29 |     {% set select = snapshot_staging_table(strategy, sql, target_relation) %}
30 | 
31 |     {% call statement('build_snapshot_staging_relation') %}
32 |         {{ create_table_as(False, temp_relation, select) }}
33 |     {% endcall %}
34 | 
35 |     {% do return(temp_relation) %}
36 | {% endmacro %}
37 | 
38 | {% macro duckdb__post_snapshot(staging_relation) %}
39 |     {% do return(drop_relation(staging_relation)) %}
40 | {% endmacro %}
41 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/utils/upstream.sql:
--------------------------------------------------------------------------------
 1 | {%- macro register_upstream_external_models() -%}
 2 | {% if execute %}
 3 | {% set upstream_nodes = {} %}
 4 | {% set upstream_schemas = {} %}
 5 | {% for node in selected_resources %}
 6 |   {% if node not in graph['nodes'] %}{% continue %}{% endif %}
 7 |   {% for upstream_node in graph['nodes'][node]['depends_on']['nodes'] %}
 8 |     {% if upstream_node not in upstream_nodes and upstream_node not in selected_resources %}
 9 |       {% do upstream_nodes.update({upstream_node: None}) %}
10 |       {% set upstream = graph['nodes'].get(upstream_node) %}
11 |       {% if upstream
12 |          and upstream.resource_type in ('model', 'seed')
13 |          and upstream.config.materialized=='external'
14 |       %}
15 |         {%- set upstream_rel = api.Relation.create(
16 |           database=upstream['database'],
17 |           schema=upstream['schema'],
18 |           identifier=upstream['alias']
19 |         ) -%}
20 |         {%- set location = upstream.config.get('location', external_location(upstream_rel, upstream.config)) -%}
21 |         {%- set rendered_options = render_write_options(upstream.config) -%}
22 |         {%- set upstream_location = adapter.external_read_location(location, rendered_options) -%}
23 |         {% if upstream_rel.schema not in upstream_schemas %}
24 |           {% call statement('main', language='sql') -%}
25 |             create schema if not exists {{ upstream_rel.without_identifier() }}
26 |           {%- endcall %}
27 |           {% do upstream_schemas.update({upstream_rel.schema: None}) %}
28 |         {% endif %}
29 |         {% call statement('main', language='sql') -%}
30 |           create or replace view {{ upstream_rel }} as (
31 |             select * from '{{ upstream_location }}'
32 |           );
33 |         {%- endcall %}
34 |       {%- endif %}
35 |     {% endif %}
36 |   {% endfor %}
37 | {% endfor %}
38 | {% if upstream_schemas %}
39 |   {% do adapter.commit() %}
40 | {% endif %}
41 | {% endif %}
42 | {%- endmacro -%}
43 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/test_sqlite.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sqlite3
 3 | from pathlib import Path
 4 | from dbt.tests.util import (
 5 |     run_dbt,
 6 | )
 7 | 
 8 | model_sql = """
 9 |     {{ config(materialized='incremental', database='satest') }}
10 |     select * from satest.tt1
11 | """
12 | 
13 | 
14 | class TestSQLitePlugin:
15 | 
16 |     @pytest.fixture(scope="class")
17 |     def sqlite_test_db(self):
18 |         path = '/tmp/satest.db'
19 |         Path(path).unlink(missing_ok=True)
20 |         db = sqlite3.connect(path)
21 |         cursor = db.cursor()
22 |         cursor.execute("CREATE TABLE tt1 (id int, name text)")
23 |         cursor.execute("INSERT INTO tt1 VALUES (1, 'John Doe')")
24 |         cursor.execute("INSERT INTO tt1 VALUES (2, 'Jane Smith')")
25 |         cursor.execute("CREATE TABLE test_table2 (a int, b int, c int)")
26 |         cursor.execute("INSERT INTO test_table2 VALUES (1, 2, 3), (4, 5, 6)")
27 |         cursor.close()
28 |         db.commit()
29 |         db.close()
30 | 
31 |         yield path
32 | 
33 |     @pytest.fixture(scope="class")
34 |     def profiles_config_update(self, dbt_profile_target, sqlite_test_db):
35 |         return {
36 |             "test": {
37 |                 "outputs": {
38 |                     "dev": {
39 |                         "type": "duckdb",
40 |                         "path": dbt_profile_target.get("path", ":memory:"),
41 |                         "attach": [
42 |                            {'path': sqlite_test_db}
43 |                         ]
44 |                     }
45 |                 },
46 |                 "target": "dev",
47 |             }
48 |         }
49 | 
50 |     @pytest.fixture(scope="class")
51 |     def models(self, test_data_path):
52 |         return {
53 |             "read_write.sql": model_sql,
54 | 
55 |         }
56 | 
57 |     def test_sqlite_plugin(self, project):
58 |         results = run_dbt()
59 |         assert len(results) == 1
60 | 
61 |         res = project.run_sql("SELECT COUNT(1) FROM satest.read_write", fetch="one")
62 |         assert res[0] == 2
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/seed.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | {% macro duckdb__get_binding_char() %}
 3 |   {{ return(adapter.get_binding_char()) }}
 4 | {% endmacro %}
 5 | 
 6 | {% macro duckdb__get_batch_size() %}
 7 |   {{ return(10000) }}
 8 | {% endmacro %}
 9 | 
10 | {% macro duckdb__load_csv_rows(model, agate_table) %}
11 |     {% if config.get('fast', true) %}
12 |         {% set seed_file_path = adapter.get_seed_file_path(model) %}
13 |         {% set delimiter = config.get('delimiter', ',') %}
14 |         {% set sql %}
15 |           COPY {{ this.render() }} FROM '{{ seed_file_path }}' (FORMAT CSV, HEADER TRUE, DELIMITER '{{ delimiter }}')
16 |         {% endset %}
17 |         {% do adapter.add_query(sql, abridge_sql_log=True) %}
18 |         {{ return(sql) }}
19 |     {% endif %}
20 | 
21 |     {% set batch_size = get_batch_size() %}
22 |     {% set agate_table = adapter.convert_datetimes_to_strs(agate_table) %}
23 |     {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}
24 |     {% set bindings = [] %}
25 | 
26 |     {% set statements = [] %}
27 | 
28 |     {% for chunk in agate_table.rows | batch(batch_size) %}
29 |         {% set bindings = [] %}
30 | 
31 |         {% for row in chunk %}
32 |             {% do bindings.extend(row) %}
33 |         {% endfor %}
34 | 
35 |         {% set sql %}
36 |             insert into {{ this.render() }} ({{ cols_sql }}) values
37 |             {% for row in chunk -%}
38 |                 ({%- for column in agate_table.column_names -%}
39 |                     {{ get_binding_char() }}
40 |                     {%- if not loop.last%},{%- endif %}
41 |                 {%- endfor -%})
42 |                 {%- if not loop.last%},{%- endif %}
43 |             {%- endfor %}
44 |         {% endset %}
45 | 
46 |         {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}
47 | 
48 |         {% if loop.index0 == 0 %}
49 |             {% do statements.append(sql) %}
50 |         {% endif %}
51 |     {% endfor %}
52 | 
53 |     {# Return SQL so we can render it out into the compiled files #}
54 |     {{ return(statements[0]) }}
55 | {% endmacro %}
56 | 


--------------------------------------------------------------------------------
/tests/bv_test_server.py:
--------------------------------------------------------------------------------
 1 | import importlib.util
 2 | import os
 3 | import tempfile
 4 | 
 5 | from buenavista.backends.duckdb import DuckDBConnection
 6 | from buenavista.core import BVType, Extension, Session, QueryResult, SimpleQueryResult
 7 | from buenavista.postgres import BuenaVistaServer
 8 | 
 9 | from dbt.adapters.duckdb.credentials import DuckDBCredentials
10 | from dbt.adapters.duckdb.environments import Environment
11 | 
12 | 
13 | class TestPythonRunner(Extension):
14 |     def type(self) -> str:
15 |         return "dbt_python_job"
16 | 
17 |     def apply(self, params: dict, handle: Session) -> QueryResult:
18 |         mod_file = tempfile.NamedTemporaryFile(suffix=".py", delete=False)
19 |         mod_file.write(params["module_definition"].lstrip().encode("utf-8"))
20 |         mod_file.close()
21 |         try:
22 |             spec = importlib.util.spec_from_file_location(
23 |                 params["module_name"],
24 |                 mod_file.name,
25 |             )
26 |             if not spec:
27 |                 raise Exception("Failed to load python model as module")
28 |             module = importlib.util.module_from_spec(spec)
29 |             if spec.loader:
30 |                 spec.loader.exec_module(module)
31 |             else:
32 |                 raise Exception("Module spec did not include a loader")
33 |             # Do the actual work to run the code here
34 |             cursor = handle.cursor()
35 |             dbt = module.dbtObj(handle.load_df_function)
36 |             df = module.model(dbt, cursor)
37 |             module.materialize(df, cursor)
38 |             return SimpleQueryResult("msg", "Success", BVType.TEXT)
39 |         finally:
40 |             os.unlink(mod_file.name)
41 | 
42 | 
43 | def create():
44 |     config = {"path": ":memory:", "type": "duckdb"}
45 |     creds = DuckDBCredentials.from_dict(config)
46 |     db = Environment.initialize_db(creds)
47 |     conn = DuckDBConnection(db)
48 |     server = BuenaVistaServer(
49 |         ("localhost", 5433), conn, extensions=[TestPythonRunner()]
50 |     )
51 |     return server
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     server = create()
56 |     server.serve_forever()
57 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/simple_seed/test_fast_seed.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.tests.adapter.simple_seed.test_seed import SeedTestBase
 4 | from dbt.tests.adapter.simple_seed.test_seed import SeedUniqueDelimiterTestBase
 5 | from dbt.tests.util import (
 6 |     run_dbt,
 7 | )
 8 | 
 9 | class TestSeedConfigFast(SeedTestBase):
10 |     @pytest.fixture(scope="class")
11 |     def project_config_update(self):
12 |         return {
13 |             "seeds": {"quote_columns": False, "fast": True}
14 |         }
15 | 
16 |     def test_simple_seed_fast(self, project):
17 |         self._build_relations_for_test(project)
18 |         self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True)
19 | 
20 | 
21 | class TestSeedWithUniqueDelimiter(SeedUniqueDelimiterTestBase):
22 |     def test_seed_with_unique_delimiter(self, project):
23 |         """Testing correct run of seeds with a unique delimiter (pipe in this case)"""
24 |         self._build_relations_for_test(project)
25 |         self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True)
26 | 
27 | 
28 | class TestSeedWithWrongDelimiter(SeedUniqueDelimiterTestBase):
29 |     @pytest.fixture(scope="class")
30 |     def project_config_update(self):
31 |         return {
32 |             "seeds": {"quote_columns": False, "delimiter": ";"},
33 |         }
34 | 
35 |     def test_seed_with_wrong_delimiter(self, project):
36 |         """Testing failure of running dbt seed with a wrongly configured delimiter"""
37 |         seed_result = run_dbt(["seed"], expect_pass=False)
38 |         assert "syntax error" in seed_result.results[0].message.lower()
39 | 
40 | 
41 | class TestSeedWithEmptyDelimiter(SeedUniqueDelimiterTestBase):
42 |     @pytest.fixture(scope="class")
43 |     def project_config_update(self):
44 |         return {
45 |             "seeds": {"quote_columns": False, "delimiter": ""},
46 |         }
47 | 
48 |     def test_seed_with_empty_delimiter(self, project):
49 |         """Testing failure of running dbt seed with an empty configured delimiter value"""
50 |         seed_result = run_dbt(["seed"], expect_pass=False)
51 |         assert "compilation error" in seed_result.results[0].message.lower()
52 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/test_iceberg.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.tests.util import (
 4 |     check_relations_equal,
 5 |     run_dbt,
 6 | )
 7 | 
 8 | sources_schema_yml = """
 9 | version: 2
10 | sources:
11 |   - name: iceberg_source
12 |     schema: main
13 |     config:
14 |       plugin: iceberg
15 |       iceberg_table: "examples.{identifier}"
16 |     tables:
17 |       - name: nyc_taxi_locations
18 | """
19 | 
20 | models_source_model1_sql = """
21 |     select * from {{ source('iceberg_source', 'nyc_taxi_locations') }}
22 | """
23 | 
24 | 
25 | # Skipping this b/c it requires using my (@jwills) personal creds
26 | # when testing it locally and also b/c I think there is something
27 | # wrong with profiles_config_update since it can't be used in multiple
28 | # tests in the same pytest session
29 | @pytest.mark.skip
30 | class TestIcebergPlugin:
31 |     @pytest.fixture(scope="class")
32 |     def profiles_config_update(self, dbt_profile_target):
33 |         config = {"catalog": "default"}
34 |         if "path" not in dbt_profile_target:
35 |             return {}
36 |         return {
37 |             "test": {
38 |                 "outputs": {
39 |                     "dev": {
40 |                         "type": "duckdb",
41 |                         "path": dbt_profile_target["path"],
42 |                         "plugins": [
43 |                             {"module": "iceberg", "config": config}
44 |                         ],
45 |                     }
46 |                 },
47 |                 "target": "dev",
48 |             }
49 |         }
50 | 
51 |     @pytest.fixture(scope="class")
52 |     def models(self):
53 |         return {
54 |             "schema.yml": sources_schema_yml,
55 |             "source_model1.sql": models_source_model1_sql,
56 |         }
57 | 
58 |     def test_iceberg_plugin(self, project):
59 |         results = run_dbt()
60 |         assert len(results) == 1
61 | 
62 |         res = project.run_sql("SELECT COUNT(1) FROM nyc_taxi_locations", fetch="one")
63 |         assert res[0] == 265
64 | 
65 |         check_relations_equal(
66 |             project.adapter,
67 |             [
68 |                 "nyc_taxi_locations",
69 |                 "source_model1",
70 |             ],
71 |         )


--------------------------------------------------------------------------------
/tests/functional/plugins/test_excel.py:
--------------------------------------------------------------------------------
 1 | import pandas
 2 | import pytest
 3 | 
 4 | from dbt.tests.util import (
 5 |     check_relations_equal,
 6 |     run_dbt,
 7 | )
 8 | 
 9 | schema_yml = """
10 | version: 2
11 | sources:
12 |   - name: excel_source
13 |     schema: main
14 |     meta:
15 |       plugin: excel
16 |     tables:
17 |       - name: excel_file
18 |         description: "An excel file"
19 |         meta:
20 |           external_location: "{test_data_path}/excel_file.xlsx"
21 | """
22 | 
23 | plugins = [
24 |     {
25 |         "module": "excel",
26 |         "config": {
27 |             "output": {
28 |                 "engine": "openpyxl",
29 |                 "file": "/tmp/excel_file_out.xlsx",
30 |                 "lazy_close": False
31 |             }
32 |         }
33 |     },
34 | ]
35 | 
36 | model_sql = """
37 |     {{ config(materialized='external', plugin='excel') }}
38 |     select * from {{ source('excel_source', 'excel_file') }}
39 | """
40 | 
41 | 
42 | class TestExcelPlugin:
43 |     @pytest.fixture(scope="class")
44 |     def profiles_config_update(self, dbt_profile_target):
45 |         return {
46 |             "test": {
47 |                 "outputs": {
48 |                     "dev": {
49 |                         "type": "duckdb",
50 |                         "path": dbt_profile_target.get("path", ":memory:"),
51 |                         "plugins": plugins,
52 |                     }
53 |                 },
54 |                 "target": "dev",
55 |             }
56 |         }
57 | 
58 |     @pytest.fixture(scope="class")
59 |     def models(self, test_data_path):
60 |         return {
61 |             "schema_excel.yml": schema_yml.format(test_data_path=test_data_path),
62 |             "excel_read_write.sql": model_sql,
63 |         }
64 | 
65 |     def test_excel_plugin(self, project):
66 |         results = run_dbt()
67 |         assert len(results) == 1
68 | 
69 |         res = project.run_sql("SELECT COUNT(1) FROM excel_file", fetch="one")
70 |         assert res[0] == 9
71 | 
72 |         df = pandas.read_excel('/tmp/excel_file_out.xlsx')
73 |         assert df.shape[0] == 9
74 |         assert df['First Name'].iloc[0] == 'Dulce'
75 | 
76 |         check_relations_equal(
77 |             project.adapter,
78 |             [
79 |                 "excel_file",
80 |                 "excel_read_write",
81 |             ],
82 |         )
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/tests/unit/test_retries_query.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from unittest.mock import MagicMock
 3 | from unittest.mock import patch
 4 | 
 5 | from duckdb import IOException
 6 | 
 7 | from dbt.adapters.duckdb.credentials import Retries
 8 | from dbt.adapters.duckdb.environments import RetryableCursor
 9 | 
10 | class TestRetryableCursor:
11 | 
12 |     @pytest.fixture
13 |     def mock_cursor(self):
14 |         return MagicMock()
15 | 
16 |     @pytest.fixture
17 |     def mock_retries(self):
18 |         return Retries(query_attempts=3)
19 | 
20 |     @pytest.fixture
21 |     def retry_cursor(self, mock_cursor, mock_retries):
22 |         return RetryableCursor(
23 |             mock_cursor,
24 |             mock_retries.query_attempts,
25 |             mock_retries.retryable_exceptions)
26 | 
27 |     def test_successful_execute(self, mock_cursor, retry_cursor):
28 |         """ Test that execute successfully runs the SQL query. """
29 |         sql_query = "SELECT * FROM table"
30 |         retry_cursor.execute(sql_query)
31 |         mock_cursor.execute.assert_called_once_with(sql_query)
32 | 
33 |     def test_retry_on_failure(self, mock_cursor, retry_cursor):
34 |         """ Test that execute retries the SQL query on failure. """
35 |         mock_cursor.execute.side_effect = [IOException, None]
36 |         sql_query = "SELECT * FROM table"
37 |         retry_cursor.execute(sql_query)
38 |         assert mock_cursor.execute.call_count == 2
39 | 
40 |     def test_no_retry_on_non_retryable_exception(self, mock_cursor, retry_cursor):
41 |         """ Test that a non-retryable exception is not retried. """
42 |         mock_cursor.execute.side_effect = ValueError
43 |         sql_query = "SELECT * FROM table"
44 |         with pytest.raises(ValueError):
45 |             retry_cursor.execute(sql_query)
46 |         mock_cursor.execute.assert_called_once_with(sql_query)
47 | 
48 |     def test_exponential_backoff(self, mock_cursor, retry_cursor):
49 |         """ Test that exponential backoff is applied between retries. """
50 |         mock_cursor.execute.side_effect = [IOException, IOException, None]
51 |         sql_query = "SELECT * FROM table"
52 | 
53 |         with patch("time.sleep") as mock_sleep:
54 |             retry_cursor.execute(sql_query)
55 |             assert mock_sleep.call_count == 2
56 |             mock_sleep.assert_any_call(1)
57 |             mock_sleep.assert_any_call(2)
58 | 


--------------------------------------------------------------------------------
/tests/unit/test_get_column_schema.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from argparse import Namespace
 3 | from unittest import mock
 4 | 
 5 | from dbt.flags import set_from_args
 6 | from dbt.adapters.duckdb import DuckDBAdapter
 7 | from tests.unit.utils import config_from_parts_or_dicts
 8 | 
 9 | 
10 | class TestDuckDBAdapterGetColumnSchemaFromQuery(unittest.TestCase):
11 |     def setUp(self):
12 |         set_from_args(Namespace(STRICT_MODE=True), {})
13 | 
14 |         profile_cfg = {
15 |             "outputs": {
16 |                 "test": {
17 |                     "type": "duckdb",
18 |                     "path": ":memory:",
19 |                 }
20 |             },
21 |             "target": "test",
22 |         }
23 | 
24 |         project_cfg = {
25 |             "name": "X",
26 |             "version": "0.1",
27 |             "profile": "test",
28 |             "project-root": "/tmp/dbt/does-not-exist",
29 |             "quoting": {
30 |                 "identifier": False,
31 |                 "schema": True,
32 |             },
33 |             "config-version": 2,
34 |         }
35 | 
36 |         self.config = config_from_parts_or_dicts(project_cfg, profile_cfg, cli_vars={})
37 |         self.mock_mp_context = mock.MagicMock()
38 |         self._adapter = None
39 | 
40 |     @property
41 |     def adapter(self):
42 |         if self._adapter is None:
43 |             self._adapter = DuckDBAdapter(self.config, self.mock_mp_context)
44 |         return self._adapter
45 | 
46 |     def test_get_column_schema_from_query_with_struct(self):
47 |         """Test get_column_schema_from_query flattens struct columns."""
48 |         mock_cursor = mock.MagicMock()
49 |         mock_cursor.fetchall.return_value = [
50 |             ("id", "INTEGER"),
51 |             ("user_data", "STRUCT(name VARCHAR, age INTEGER)")
52 |         ]
53 |         
54 |         with mock.patch.object(self.adapter.connections, 'add_select_query', return_value=(None, mock_cursor)):
55 |             result = self.adapter.get_column_schema_from_query("SELECT * FROM test_table")
56 |             
57 |             # Verify result contains flattened columns (1 simple + 2 from struct)
58 |             self.assertEqual(len(result), 3)
59 |             self.assertEqual(result[0].column, "id")
60 |             self.assertEqual(result[1].column, "user_data.name")
61 |             self.assertEqual(result[2].column, "user_data.age")
62 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | skipsdist = True
 3 | envlist = py{310,311,312,313}
 4 | 
 5 | [testenv:{unit,py310,py311,py312,py313,py}]
 6 | description = unit testing
 7 | skip_install = True
 8 | passenv = *
 9 | commands = {envpython} -m pytest {posargs} tests/unit
10 | deps =
11 |   -rdev-requirements.txt
12 |   -e.
13 | 
14 | [testenv:{functional,py310,py311,py312,py313,py}]
15 | description = adapter functional testing
16 | skip_install = True
17 | passenv = *
18 | commands = {envpython} -m pytest {posargs} tests/functional/adapter
19 | deps =
20 |   -rdev-requirements.txt
21 |   -e.
22 | 
23 | [testenv:{filebased,py310,py311,py312,py313,py}]
24 | description = adapter functional testing using file-based DBs
25 | skip_install = True
26 | passenv = *
27 | commands = {envpython} -m pytest --profile=file {posargs} tests/functional/adapter
28 | deps =
29 |   -rdev-requirements.txt
30 |   -e.
31 | 
32 | [testenv:{buenavista,py310}]
33 | description = adapter functional testing using a Buena Vista server
34 | skip_install = True
35 | passenv = *
36 | commands = {envpython} -m pytest --profile=buenavista {posargs} tests/functional/adapter
37 | deps =
38 |   -rdev-requirements.txt
39 |   -e.
40 | 
41 | [testenv:{md,py311}]
42 | description = adapter function testing using MotherDuck
43 | skip_install = True
44 | passenv = *
45 | commands = {envpython} -m pytest --profile=md --maxfail=2 {posargs} tests/functional/plugins/motherduck tests/functional/adapter
46 | deps =
47 |   duckdb==1.4.2
48 |   -rdev-requirements.txt
49 |   -e.[md]
50 | 
51 | [testenv:{fsspec,py310,py311,py312,py313,py}]
52 | description = adapter fsspec testing
53 | skip_install = True
54 | passenv = *
55 | commands = {envpython} -m pytest {posargs} tests/functional/fsspec
56 | deps =
57 |   -rdev-requirements.txt
58 |   -e.
59 | 
60 | [testenv:{plugins,py310,py311,py312,py313,py}]
61 | description = adapter plugin testing
62 | skip_install = True
63 | passenv = *
64 | commands = {envpython} -m pytest {posargs} --profile=file tests/functional/plugins
65 | deps =
66 |   duckdb==1.4.2
67 |   -rdev-requirements.txt
68 |   -e.
69 | 
70 | [testenv:{nightly,py310,py311,py312,py313,py}]
71 | description = duckdb nightly release testing
72 | skip_install = True
73 | passenv = *
74 | commands =
75 |   {envpython} -m pip install --upgrade --pre duckdb
76 |   {envpython} -m pip show duckdb
77 |   {envpython} -m pytest {posargs} --profile=nightly tests/unit tests/functional/adapter
78 | deps =
79 |   -rdev-requirements.txt
80 |   -e.
81 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_sources.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from dbt.tests.util import run_dbt
 5 | 
 6 | sources_schema_yml = """version: 2
 7 | sources:
 8 |   - name: external_source
 9 |     config:
10 |       external_location: "/tmp/{name}_{extra}.csv"
11 |     tables:
12 |       - name: seeds_source
13 |         description: "A source table"
14 |         config:
15 |           extra: 'something'
16 |         columns:
17 |           - name: id
18 |             description: "An id"
19 |             tests:
20 |               - unique
21 |               - not_null
22 |       - name: seeds_ost
23 |         identifier: "seeds_other_source_table"
24 |         config:
25 |           external_location: "read_csv_auto('/tmp/%(identifier)s.csv')"
26 |           formatter: oldstyle
27 |       - name: seeds_other_source_table
28 |         config:
29 |           external_location: "read_csv_auto('/tmp/${name}.csv')"
30 |           formatter: template
31 | """
32 | 
33 | models_source_model_sql = """select * from {{ source('external_source', 'seeds_source') }}
34 | """
35 | 
36 | models_multi_source_model_sql = """select s.* from {{ source('external_source', 'seeds_source') }} s
37 |   inner join {{ source('external_source', 'seeds_ost') }} oldstyle USING (id)
38 |   inner join {{ source('external_source', 'seeds_other_source_table') }} tmpl USING (id)
39 | """
40 | 
41 | 
42 | class TestExternalSources:
43 |     @pytest.fixture(scope="class")
44 |     def models(self):
45 |         return {
46 |             "schema.yml": sources_schema_yml,
47 |             "source_model.sql": models_source_model_sql,
48 |             "multi_source_model.sql": models_multi_source_model_sql,
49 |         }
50 | 
51 |     @pytest.fixture(scope="class")
52 |     def seeds_source_file(self):
53 |         with open("/tmp/seeds_source_something.csv", "w") as f:
54 |             f.write("id,a,b\n1,2,3\n4,5,6\n7,8,9")
55 |         yield
56 |         os.unlink("/tmp/seeds_source_something.csv")
57 | 
58 |     @pytest.fixture(scope="class")
59 |     def ost_file(self):
60 |         with open("/tmp/seeds_other_source_table.csv", "w") as f:
61 |             f.write("id,c,d\n1,2,3\n4,5,6\n7,8,9")
62 |         yield
63 |         os.unlink("/tmp/seeds_other_source_table.csv")
64 | 
65 |     def test_external_sources(self, seeds_source_file, ost_file, project):
66 |         results = run_dbt(["run"])
67 |         assert len(results) == 2
68 |         test_results = run_dbt(["test"])
69 |         assert len(test_results) == 2
70 | 


--------------------------------------------------------------------------------
/tests/unit/test_external_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from argparse import Namespace
 3 | from unittest import mock
 4 | 
 5 | from dbt.flags import set_from_args
 6 | from dbt.adapters.duckdb import DuckDBAdapter
 7 | from tests.unit.utils import config_from_parts_or_dicts
 8 | 
 9 | class TestExternalUtils(unittest.TestCase):
10 |     def setUp(self):
11 |         set_from_args(Namespace(STRICT_MODE=True), {})
12 | 
13 |         profile_cfg = {
14 |             "outputs": {
15 |                 "test": {
16 |                     "type": "duckdb",
17 |                     "path": ":memory:",
18 |                 }
19 |             },
20 |             "target": "test",
21 |         }
22 | 
23 |         project_cfg = {
24 |             "name": "X",
25 |             "version": "0.1",
26 |             "profile": "test",
27 |             "project-root": "/tmp/dbt/does-not-exist",
28 |             "config-version": 2,
29 |         }
30 | 
31 |         self.config = config_from_parts_or_dicts(project_cfg, profile_cfg, cli_vars={})
32 |         self._adapter = None
33 | 
34 |     @property
35 |     def adapter(self):
36 |         self.mock_mp_context = mock.MagicMock()
37 |         if self._adapter is None:
38 |             self._adapter = DuckDBAdapter(self.config, self.mock_mp_context)
39 |         return self._adapter
40 | 
41 |     def test_external_write_options(self):
42 |         data = [
43 |             ("/tmp/test.csv", {}, "format csv, header 1"),
44 |             ("./foo.parquet", {"codec": "zstd"}, "codec zstd, format parquet"),
45 |             ("bar", {"delimiter": "|", "header": "0"}, "delimiter '|', header 0, format csv"),
46 |             ("a.parquet", {"partition_by": "ds"}, "partition_by ds, format parquet"),
47 |             ("b.csv", {"partition_by": "ds,category"}, "partition_by (ds,category), format csv, header 1"),
48 |             ("/path/to/c.csv", {"null": "\\N"}, "null '\\N', format csv, header 1")
49 |         ]
50 | 
51 |         for (loc, opts, expected) in data:
52 |             assert expected == self.adapter.external_write_options(loc, opts)
53 |     
54 | 
55 |     def test_external_read_location(self):
56 |         data = [
57 |             ("bar", {"format": "csv", "delimiter": "|", "header": "0"}, "bar"),
58 |             ("/tmp/a", {"partition_by": "ds", "format": "parquet"}, "/tmp/a/*/*.parquet"),
59 |             ("b", {"partition_by": "ds,category"}, "b/*/*/*.parquet"),
60 |         ]
61 |         for (loc, opts, expected) in data:
62 |             assert expected == self.adapter.external_read_location(loc, opts)


--------------------------------------------------------------------------------
/tests/functional/plugins/test_gsheet.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.tests.util import (
 4 |     check_relations_equal,
 5 |     run_dbt,
 6 | )
 7 | 
 8 | sources_schema_yml = """
 9 | version: 2
10 | sources:
11 |   - name: gsheet_source
12 |     schema: main
13 |     meta:
14 |       plugin: gsheet
15 |       title: "Josh's Test Spreadsheet"
16 |     tables:
17 |       - name: gsheet1
18 |         description: "My first sheet"
19 |       - name: gsheet2
20 |         description: "The second sheet in the doc"
21 |         meta:
22 |           worksheet: "TwoSheet"
23 | """
24 | 
25 | models_source_model1_sql = """
26 |     select * from {{ source('gsheet_source', 'gsheet1') }}
27 | """
28 | models_source_model2_sql = """
29 |     select * from {{ source('gsheet_source', 'gsheet2') }}
30 | """
31 | 
32 | 
33 | # Skipping this b/c it requires using my (@jwills) personal creds
34 | # when testing it locally and also b/c I think there is something
35 | # wrong with profiles_config_update since it can't be used in multiple
36 | # tests in the same pytest session
37 | @pytest.mark.skip
38 | class TestGSheetPlugin:
39 |     @pytest.fixture(scope="class")
40 |     def profiles_config_update(self, dbt_profile_target):
41 |         config = {"method": "oauth"}
42 |         if "path" not in dbt_profile_target:
43 |             return {}
44 |         return {
45 |             "test": {
46 |                 "outputs": {
47 |                     "dev": {
48 |                         "type": "duckdb",
49 |                         "path": dbt_profile_target["path"],
50 |                         "plugins": [
51 |                             {"module": "gsheet", "config": config}
52 |                         ],
53 |                     }
54 |                 },
55 |                 "target": "dev",
56 |             }
57 |         }
58 | 
59 |     @pytest.fixture(scope="class")
60 |     def models(self, test_data_path):
61 |         return {
62 |             "schema.yml": sources_schema_yml.format(test_data_path=test_data_path),
63 |             "source_model1.sql": models_source_model1_sql,
64 |             "source_model2.sql": models_source_model2_sql,
65 |         }
66 | 
67 |     def test_gshseet_plugin(self, project):
68 |         results = run_dbt()
69 |         assert len(results) == 2
70 | 
71 |         check_relations_equal(
72 |             project.adapter,
73 |             [
74 |                 "gsheet1",
75 |                 "source_model1",
76 |             ],
77 |         )
78 | 
79 |         check_relations_equal(
80 |             project.adapter,
81 |             [
82 |                 "gsheet2",
83 |                 "source_model2",
84 |             ],
85 |         )
86 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/table_function.sql:
--------------------------------------------------------------------------------
 1 | {% materialization table_function, adapter='duckdb' %}
 2 |   -- This materialization uses DuckDB's Table Function / Table Macro feature to provide parameterized views.
 3 |   -- Why use this?
 4 |   --     Late binding of functions means that the underlying table can change (have new columns added), and
 5 |   --       the function does not need to be recreated. (With a view, the create view statement would need to be re-run).
 6 |   --       This allows for skipping parts of the dbt DAG, even if the underlying table changed.
 7 |   --     Parameters can force filter pushdown
 8 |   --     Functions can provide advanced features like dynamic SQL (the query and query_table functions)
 9 | 
10 |   -- For usage examples, see the tests at /dbt-duckdb/tests/functional/adapter/test_table_function.py
11 |   --     (Don't forget parentheses when you pull from a table_function!)
12 | 
13 |   -- Using Redshift as an example:
14 |   -- https://github.com/dbt-labs/dbt-adapters/blob/main/dbt-redshift/src/dbt/include/redshift/macros/materializations/table.sql
15 |   {%- set identifier = model['alias'] -%}
16 |   {%- set target_relation = api.Relation.create(
17 |       identifier=identifier,
18 |       schema=schema,
19 |       database=database,
20 |       type='view') -%}
21 |   {%- set backup_relation = none -%}
22 | 
23 |   -- The parameters config is used to pass in the names of the parameters that will be used within the table function.
24 |   -- parameters can be a single string value (with or without commas), or a list of strings.
25 |   {%- set parameters=config.get('parameters') -%}
26 | 
27 |   {{ run_hooks(pre_hooks, inside_transaction=False) }}
28 | 
29 |   -- `BEGIN` happens here:
30 |   {{ run_hooks(pre_hooks, inside_transaction=True) }}
31 | 
32 |   -- Create or replace the function (macro)
33 |   -- By using create or replace (and a transaction), we do not need an old version and new version.
34 |   {% call statement('main') -%}
35 |     create or replace function {{ target_relation.render() }}(
36 |         {% if not parameters %}
37 |         {% elif parameters is string or parameters is number %}
38 |           {{ parameters if parameters }}
39 |         {% else  %}
40 |           {{ parameters|join(', ') }}
41 |         {% endif %}
42 |       ) as table (
43 |         {{ sql }});
44 |   {%- endcall %}
45 | 
46 |   {{ run_hooks(post_hooks, inside_transaction=True) }}
47 | 
48 |   {% do persist_docs(target_relation, model) %}
49 | 
50 |   -- `COMMIT` happens here:
51 |   {{ adapter.commit() }}
52 | 
53 |   {{ run_hooks(post_hooks, inside_transaction=False) }}
54 | 
55 |   {{ return({'relations': [target_relation]}) }}
56 | 
57 | {% endmaterialization %}
58 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/gsheet.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Any
 3 | from typing import Dict
 4 | from typing import Literal
 5 | 
 6 | import gspread
 7 | import pandas as pd
 8 | 
 9 | from . import BasePlugin
10 | from . import PluginConfig
11 | from ..utils import SourceConfig
12 | 
13 | 
14 | @dataclass
15 | class GSheetConfig(PluginConfig):
16 |     method: Literal["service", "oauth"]
17 | 
18 |     def client(self):
19 |         if self.method == "service":
20 |             return gspread.service_account()
21 |         else:
22 |             return gspread.oauth()
23 | 
24 | 
25 | class Plugin(BasePlugin):
26 |     def initialize(self, config: Dict[str, Any]):
27 |         self._config = GSheetConfig.from_dict(config)
28 |         self._gc = self._config.client()
29 | 
30 |     def load(self, source_config: SourceConfig):
31 |         doc = None
32 |         if "title" in source_config:
33 |             doc = self._gc.open(source_config["title"])
34 |         elif "key" in source_config:
35 |             doc = self._gc.open_by_key(source_config["key"])
36 |         elif "url" in source_config:
37 |             doc = self._gc.open_by_url(source_config["url"])
38 |         else:
39 |             raise Exception("Source config did not indicate a method to open a GSheet to read")
40 | 
41 |         sheet = None
42 |         if "worksheet" in source_config:
43 |             work_id = source_config["worksheet"]
44 |             if isinstance(work_id, int):
45 |                 sheet = doc.get_worksheet(work_id)
46 |             elif isinstance(work_id, str):
47 |                 sheet = doc.worksheet(work_id)
48 |             else:
49 |                 raise Exception(
50 |                     f"Could not identify a worksheet in the doc from identifier: {work_id}"
51 |                 )
52 |         else:
53 |             sheet = doc.sheet1
54 | 
55 |         if "range" in source_config:
56 |             range = source_config["range"]
57 |             df = pd.DataFrame(sheet.get(range))
58 |             if "headers" in source_config:
59 |                 headers = source_config["headers"]
60 |                 if len(headers) == len(df.columns):
61 |                     df.columns = headers
62 |                     return df
63 |                 else:
64 |                     raise Exception(
65 |                         f"Number of configured headers ({len(headers)}) does not match number of columns in fetched range ({len(df.columns)})."
66 |                     )
67 |             else:
68 |                 df.rename(columns=df.iloc[0]).drop(df.index[0]).reset_index(drop=True)
69 |                 return df
70 | 
71 |         else:
72 |             return pd.DataFrame(sheet.get_all_records())
73 | 


--------------------------------------------------------------------------------
/.github/workflows/nightly.yml:
--------------------------------------------------------------------------------
 1 | # **what?**
 2 | # Runs unit tests and functional tests using the latest nightly DuckDB build.
 3 | # Any tests that use community extensions are skipped because these are not released nightly.
 4 | 
 5 | # **why?**
 6 | # Ensure code for dbt is compatible with the bleeding edge version of DuckDB.
 7 | 
 8 | # **when?**
 9 | # This will run nightly, after DuckDB releases its nightly build.
10 | 
11 | name: Tests and Code Checks (DuckDB nightly)
12 | 
13 | on:
14 |   schedule:
15 |     - cron: '0 0 * * *' # every 24 hours, top of the hour
16 |   workflow_dispatch:
17 | 
18 | permissions: read-all
19 | 
20 | defaults:
21 |   run:
22 |     shell: bash
23 | 
24 | jobs:
25 |   nightly:
26 |     name: nightly test / python ${{ matrix.python-version }}
27 | 
28 |     runs-on: ubuntu-latest
29 | 
30 |     strategy:
31 |       fail-fast: false
32 |       matrix:
33 |         python-version: ['3.10', '3.11', '3.12', '3.13']
34 | 
35 |     env:
36 |       TOXENV: "nightly"
37 |       PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv"
38 |       S3_MD_ORG_KEY: ${{ secrets.S3_MD_ORG_KEY }}
39 |       S3_MD_ORG_REGION: ${{ secrets.S3_MD_ORG_REGION }}
40 |       S3_MD_ORG_SECRET: ${{ secrets.S3_MD_ORG_SECRET }}
41 | 
42 |     steps:
43 |       - name: Check out the repository
44 |         uses: actions/checkout@v6
45 |         with:
46 |           persist-credentials: false
47 | 
48 |       - name: Set up Python ${{ matrix.python-version }}
49 |         uses: actions/setup-python@v6
50 |         with:
51 |           python-version: ${{ matrix.python-version }}
52 | 
53 |       - name: Install python dependencies
54 |         run: |
55 |           python -m pip install tox
56 |           python -m pip --version
57 |           tox --version
58 | 
59 |       - name: Run tox
60 |         run: tox
61 | 
62 |       - name: Get current date
63 |         if: always()
64 |         id: date
65 |         run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts
66 | 
67 |       - uses: actions/upload-artifact@v6
68 |         if: always()
69 |         with:
70 |           name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv
71 |           path: unit_results.csv
72 | 
73 |   notify-failure:
74 |     name: Send Slack notification on failure
75 |     if: failure()
76 |     needs: [nightly]
77 |     runs-on: ubuntu-latest
78 |     steps:
79 |       - name: Send Slack notification
80 |         uses: slackapi/slack-github-action@v2.1.1
81 |         with:
82 |           webhook: ${{ secrets.MOTHERDUCK_CI_NOTIFICATION_WEBHOOK }}
83 |           webhook-type: webhook-trigger
84 |           payload: |
85 |             {
86 |               "text": "dbt-duckdb nightly workflow failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
87 |             }
88 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/motherduck/test_motherduck_ducklake.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from argparse import Namespace
 3 | from unittest import mock
 4 | 
 5 | from dbt.flags import set_from_args
 6 | from dbt.adapters.duckdb import DuckDBAdapter
 7 | from dbt.adapters.duckdb.relation import DuckDBRelation
 8 | from tests.unit.utils import config_from_parts_or_dicts
 9 | 
10 | 
11 | class TestMotherduckDucklakeDetection(unittest.TestCase):
12 |     def setUp(self):
13 |         set_from_args(Namespace(STRICT_MODE=True), {})
14 | 
15 |         # Use a MotherDuck path to align with plugin context, but we won't actually connect
16 |         self.base_profile_cfg = {
17 |             "outputs": {
18 |                 "test": {
19 |                     "type": "duckdb",
20 |                     "path": "md:my_db",
21 |                 }
22 |             },
23 |             "target": "test",
24 |         }
25 | 
26 |         project_cfg = {
27 |             "name": "X",
28 |             "version": "0.1",
29 |             "profile": "test",
30 |             "project-root": "/tmp/dbt/does-not-exist",
31 |             "quoting": {
32 |                 "identifier": False,
33 |                 "schema": True,
34 |             },
35 |             "config-version": 2,
36 |         }
37 | 
38 |         self.project_cfg = project_cfg
39 |         self.mock_mp_context = mock.MagicMock()
40 | 
41 |     def _get_adapter(self, profile_cfg):
42 |         config = config_from_parts_or_dicts(self.project_cfg, profile_cfg, cli_vars={})
43 |         return DuckDBAdapter(config, self.mock_mp_context)
44 | 
45 | 
46 |     def test_is_ducklake_primary_database(self):
47 |         profile_cfg = self.base_profile_cfg.copy()
48 |         profile_cfg["outputs"]["test"]["is_ducklake"] = True
49 |         
50 |         adapter = self._get_adapter(profile_cfg)
51 |         relation = DuckDBRelation.create(database="my_db", schema="main", identifier="t2")
52 | 
53 |         assert adapter.is_ducklake(relation) is True
54 | 
55 | 
56 |     def test_is_not_ducklake(self):
57 |         profile_cfg = self.base_profile_cfg.copy()
58 |         adapter = self._get_adapter(profile_cfg)
59 |         relation = DuckDBRelation.create(database="my_db", schema="main", identifier="t2")
60 |         assert adapter.is_ducklake(relation) is False
61 | 
62 | 
63 |     def test_is_ducklake_in_attachment(self):
64 |         profile_cfg = self.base_profile_cfg.copy()
65 |         profile_cfg["outputs"]["test"]["attach"] = [
66 |             {
67 |                 "path": "md:some_db",
68 |                 "type": "duckdb",
69 |                 "is_ducklake": True
70 |             }
71 |         ]
72 | 
73 |         adapter = self._get_adapter(profile_cfg)
74 |         relation = DuckDBRelation.create(database="some_db", schema="main", identifier="t")
75 | 
76 |         assert adapter.is_ducklake(relation) is True
77 | 
78 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/utils.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Any
 3 | from typing import Dict
 4 | from typing import List
 5 | from typing import Optional
 6 | from typing import Sequence
 7 | 
 8 | from dbt.adapters.base.column import Column
 9 | from dbt.adapters.base.relation import BaseRelation
10 | from dbt.adapters.contracts.relation import RelationConfig
11 | # TODO
12 | # from dbt.context.providers import RuntimeConfigObject
13 | 
14 | 
15 | @dataclass
16 | class SourceConfig:
17 |     name: str
18 |     identifier: str
19 |     schema: str
20 |     database: Optional[str]
21 |     meta: Dict[str, Any]
22 |     tags: List[str]
23 | 
24 |     def get(self, key, default=None):
25 |         return self.meta.get(key, default)
26 | 
27 |     def __getitem__(self, key):
28 |         return self.meta[key]
29 | 
30 |     def __contains__(self, key):
31 |         return key in self.meta
32 | 
33 |     def table_name(self) -> str:
34 |         if self.database:
35 |             return ".".join([self.database, self.schema, self.identifier])
36 |         else:
37 |             return ".".join([self.schema, self.identifier])
38 | 
39 |     def as_dict(self) -> Dict[str, Any]:
40 |         base = {
41 |             "name": self.name,
42 |             "identifier": self.identifier,
43 |             "schema": self.schema,
44 |             "database": self.database,
45 |             "tags": self.tags,
46 |         }
47 |         base.update(self.meta)
48 |         return base
49 | 
50 |     @classmethod
51 |     def create_from_source(cls, source: RelationConfig) -> "SourceConfig":
52 |         meta = source.meta.copy()
53 |         # Use the config properties as well if they are present
54 |         config_properties = source.config.extra if source.config else {}
55 |         meta.update(config_properties)
56 |         return SourceConfig(
57 |             name=source.name,
58 |             identifier=source.identifier,
59 |             schema=source.schema,
60 |             database=source.database,
61 |             meta=meta,
62 |             tags=source.tags or [],
63 |         )
64 | 
65 | 
66 | @dataclass
67 | class TargetLocation:
68 |     path: str
69 |     format: str
70 | 
71 |     def as_dict(self) -> Dict[str, Any]:
72 |         return {"path": self.path, "format": self.format}
73 | 
74 | 
75 | @dataclass
76 | class TargetConfig:
77 |     relation: BaseRelation
78 |     column_list: Sequence[Column]
79 |     config: Any  # TODO
80 |     location: Optional[TargetLocation] = None
81 | 
82 |     def as_dict(self) -> Dict[str, Any]:
83 |         base = {
84 |             "relation": self.relation.to_dict(),
85 |             "column_list": [{"column": c.column, "dtype": c.dtype} for c in self.column_list],
86 |             "config": self.config,
87 |         }
88 |         if self.location:
89 |             base["location"] = self.location.as_dict()
90 |         return base
91 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_rematerialize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from dbt.tests.util import run_dbt, relation_from_name
 4 | from dbt.adapters.duckdb import DuckDBConnectionManager
 5 | 
 6 | upstream_model_sql = """
 7 | select range from range(3)
 8 | """
 9 | 
10 | upstream_partition_by_model = """
11 | {{ config(materialized='external', options={"partition_by": "a"}) }}
12 | select range as a, 'foo' as b from range(5)
13 | """
14 | 
15 | downstream_model_sql = """
16 | select range * 2 from {{ ref('upstream_model') }}
17 | """
18 | 
19 | other_downstream_model_sql = """
20 | select range * 5 from {{ ref('upstream_model') }}
21 | """
22 | 
23 | downstream_of_partition_model = """
24 | select a from {{ ref('upstream_partition_by_model') }}
25 | """
26 | 
27 | 
28 | # class must begin with 'Test'
29 | class TestRematerializeDownstreamExternalModel:
30 |     """
31 |     External models should load in dependencies when they exist.
32 | 
33 |     We test that after materializing upstream and downstream models, we can
34 |     materialize the downstream model by itself, even if we are using an
35 |     in-memory database.
36 |     """
37 | 
38 |     @pytest.fixture(scope="class")
39 |     def dbt_profile_target(self, dbt_profile_target, tmp_path_factory):
40 |         extroot = str(tmp_path_factory.getbasetemp() / "rematerialize")
41 |         os.mkdir(extroot)
42 |         dbt_profile_target["external_root"] = extroot
43 |         return dbt_profile_target
44 |     
45 |     @pytest.fixture(scope="class")
46 |     def project_config_update(self):
47 |         return {
48 |             "name": "base",
49 |             "models": {"+materialized": "external"},
50 |             "on-run-start": ["{{ register_upstream_external_models() }}"],
51 |         }
52 | 
53 |     @pytest.fixture(scope="class")
54 |     def models(self):
55 |         return {
56 |             "upstream_model.sql": upstream_model_sql,
57 |             "upstream_partition_by_model.sql": upstream_partition_by_model,
58 |             "downstream_model.sql": downstream_model_sql,
59 |             "other_downstream_model.sql": other_downstream_model_sql,
60 |             "downstream_of_partition_model.sql": downstream_of_partition_model,
61 |         }
62 | 
63 |     def test_run(self, project):
64 |         run_dbt(["run"])
65 | 
66 |         # Force close the :memory: connection
67 |         DuckDBConnectionManager.close_all_connections()
68 |         run_dbt(
69 |             [
70 |                 "run",
71 |                 "--select",
72 |                 "downstream_model other_downstream_model downstream_of_partition_model",
73 |             ]
74 |         )
75 | 
76 |         # really makes sure we have created the downstream model
77 |         relation = relation_from_name(project.adapter, "downstream_of_partition_model")
78 |         result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
79 |         assert result[0] == 5
80 | 


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/table.sql:
--------------------------------------------------------------------------------
 1 | {% materialization table, adapter="duckdb", supported_languages=['sql', 'python'] %}
 2 | 
 3 |   {%- set language = model['language'] -%}
 4 | 
 5 |   {%- set existing_relation = load_cached_relation(this) -%}
 6 |   {%- set target_relation = this.incorporate(type='table') %}
 7 |   {%- set intermediate_relation =  make_intermediate_relation(target_relation) -%}
 8 |   -- the intermediate_relation should not already exist in the database; get_relation
 9 |   -- will return None in that case. Otherwise, we get a relation that we can drop
10 |   -- later, before we try to use this name for the current operation
11 |   {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}
12 |   /*
13 |       See ../view/view.sql for more information about this relation.
14 |   */
15 |   {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}
16 |   {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}
17 |   -- as above, the backup_relation should not already exist
18 |   {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}
19 |   -- grab current tables grants config for comparision later on
20 |   {% set grant_config = config.get('grants') %}
21 | 
22 |   -- drop the temp relations if they exist already in the database
23 |   {{ drop_relation_if_exists(preexisting_intermediate_relation) }}
24 |   {{ drop_relation_if_exists(preexisting_backup_relation) }}
25 | 
26 |   {{ run_hooks(pre_hooks, inside_transaction=False) }}
27 | 
28 |   -- `BEGIN` happens here:
29 |   {{ run_hooks(pre_hooks, inside_transaction=True) }}
30 | 
31 |   -- build model
32 |   {% call statement('main', language=language) -%}
33 |     {{- create_table_as(False, intermediate_relation, compiled_code, language) }}
34 |   {%- endcall %}
35 | 
36 |   -- cleanup
37 |   {% if existing_relation is not none %}
38 |       {#-- Drop indexes before renaming to avoid dependency errors --#}
39 |       {% do drop_indexes_on_relation(existing_relation) %}
40 |       {{ adapter.rename_relation(existing_relation, backup_relation) }}
41 |   {% endif %}
42 | 
43 |   {{ adapter.rename_relation(intermediate_relation, target_relation) }}
44 | 
45 |   {% do create_indexes(target_relation) %}
46 | 
47 |   {{ run_hooks(post_hooks, inside_transaction=True) }}
48 | 
49 |   {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}
50 |   {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
51 | 
52 |   {% do persist_docs(target_relation, model) %}
53 | 
54 |   -- `COMMIT` happens here
55 |   {{ adapter.commit() }}
56 | 
57 |   -- finally, drop the existing/backup relation after the commit
58 |   {{ drop_relation_if_exists(backup_relation) }}
59 | 
60 |   {{ run_hooks(post_hooks, inside_transaction=False) }}
61 | 
62 |   {{ return({'relations': [target_relation]}) }}
63 | {% endmaterialization %}
64 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_attach.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | import duckdb
 5 | import pytest
 6 | 
 7 | from dbt.adapters.duckdb import DuckDBConnectionManager
 8 | from dbt.tests.util import run_dbt
 9 | 
10 | sources_schema_yml = """
11 | version: 2
12 | sources:
13 |   - name: attached_source
14 |     database: attach_test
15 |     schema: analytics
16 |     tables:
17 |       - name: attached_table
18 |         description: "An attached table"
19 |         columns:
20 |           - name: id
21 |             description: "An id"
22 |             tests:
23 |               - unique
24 |               - not_null
25 | """
26 | 
27 | models_source_model_sql = """
28 |     select * from {{ source('attached_source', 'attached_table') }}
29 | """
30 | 
31 | models_target_model_sql = """
32 |     {{ config(materialized='table', database='attach_test') }}
33 |     SELECT * FROM {{ ref('source_model') }}
34 | """
35 | 
36 | 
37 | @pytest.mark.skip_profile("memory", "buenavista", "md")
38 | class TestAttachedDatabase:
39 |     @pytest.fixture(scope="class")
40 |     def attach_test_db(self):
41 |         with tempfile.TemporaryDirectory() as temp_dir:
42 |             path = os.path.join(temp_dir, "attach_test.duckdb")
43 |             db = duckdb.connect(path)
44 |             db.execute("CREATE SCHEMA analytics")
45 |             db.execute("CREATE TABLE analytics.attached_table AS SELECT 1 as id")
46 |             db.close()
47 |             yield path
48 | 
49 |     @pytest.fixture(scope="class")
50 |     def profiles_config_update(self, dbt_profile_target, attach_test_db):
51 |         return {
52 |             "test": {
53 |                 "outputs": {
54 |                     "dev": {
55 |                         "type": "duckdb",
56 |                         "path": dbt_profile_target.get("path", ":memory:"),
57 |                         "attach": [{"path": attach_test_db}],
58 |                     }
59 |                 },
60 |                 "target": "dev",
61 |             }
62 |         }
63 | 
64 |     @pytest.fixture(scope="class")
65 |     def models(self):
66 |         return {
67 |             "schema.yml": sources_schema_yml,
68 |             "source_model.sql": models_source_model_sql,
69 |             "target_model.sql": models_target_model_sql,
70 |         }
71 | 
72 |     def test_attached_databases(self, project, attach_test_db):
73 |         results = run_dbt()
74 |         assert len(results) == 2
75 | 
76 |         test_results = run_dbt(["test"])
77 |         assert len(test_results) == 2
78 | 
79 |         DuckDBConnectionManager.close_all_connections()
80 | 
81 |         # check that the model is created in the attached db
82 |         db = duckdb.connect(attach_test_db)
83 |         ret = db.execute("SELECT * FROM target_model").fetchall()
84 |         assert ret[0][0] == 1
85 |         db.close()
86 | 
87 |         # check that everything works on a re-run of dbt
88 |         rerun_results = run_dbt()
89 |         assert len(rerun_results) == 2
90 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/environments/buenavista.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import psycopg2
 4 | 
 5 | from . import Environment
 6 | from .. import credentials
 7 | from .. import utils
 8 | from dbt.adapters.contracts.connection import AdapterResponse
 9 | from dbt.adapters.contracts.connection import Connection
10 | 
11 | 
12 | class BVEnvironment(Environment):
13 |     @classmethod
14 |     def _get_conn(cls, dbname: str, remote: credentials.Remote):
15 |         return psycopg2.connect(
16 |             dbname=dbname,
17 |             user=remote.user,
18 |             host=remote.host,
19 |             port=remote.port,
20 |             password=remote.password,
21 |         )
22 | 
23 |     def __init__(self, credentials: credentials.DuckDBCredentials):
24 |         super().__init__(credentials)
25 |         if not self.creds.remote:
26 |             raise Exception("BVConnection only works with a remote host")
27 | 
28 |     def handle(self):
29 |         # Extensions/settings need to be configured per cursor
30 |         conn = self._get_conn(self.creds.database, self.creds.remote)
31 |         cursor = self.initialize_cursor(self.creds, conn.cursor())
32 |         cursor.close()
33 |         return conn
34 | 
35 |     def is_cancelable(cls):
36 |         return False
37 | 
38 |     @classmethod
39 |     def cancel(cls, connection: Connection):
40 |         pass
41 | 
42 |     def get_binding_char(self) -> str:
43 |         return "%s"
44 | 
45 |     def submit_python_job(self, handle, parsed_model: dict, compiled_code: str) -> AdapterResponse:
46 |         identifier = parsed_model["alias"]
47 |         payload = {
48 |             "method": "dbt_python_job",
49 |             "params": {
50 |                 "module_name": identifier,
51 |                 "module_definition": compiled_code,
52 |             },
53 |         }
54 |         # TODO: handle errors here
55 |         handle.cursor().execute(json.dumps(payload))
56 |         return AdapterResponse(_message="OK")
57 | 
58 |     def load_source(self, plugin_name: str, source_config: utils.SourceConfig):
59 |         handle = self.handle()
60 |         payload = {
61 |             "method": "dbt_load_source",
62 |             "params": {
63 |                 "plugin_name": plugin_name,
64 |                 "source_config": source_config.as_dict(),
65 |             },
66 |         }
67 |         cursor = handle.cursor()
68 |         cursor.execute(json.dumps(payload))
69 |         cursor.close()
70 |         handle.close()
71 | 
72 |     def store_relation(self, plugin_name: str, target_config: utils.TargetConfig) -> None:
73 |         handle = self.handle()
74 |         payload = {
75 |             "method": "dbt_store_relation",
76 |             "params": {
77 |                 "plugin_name": plugin_name,
78 |                 "target_config": target_config.as_dict(),
79 |             },
80 |         }
81 |         cursor = handle.cursor()
82 |         cursor.execute(json.dumps(payload))
83 |         cursor.close()
84 |         handle.close()
85 | 


--------------------------------------------------------------------------------
/tests/unit/test_column.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dbt.adapters.duckdb.column import DuckDBColumn
 4 | 
 5 | # Test cases for is_float method
 6 | @pytest.mark.parametrize("dtype, expected", [
 7 |     ("real", True),
 8 |     ("float", True),
 9 |     ("float4", True),
10 |     ("float8", True),
11 |     ("double", True),
12 |     ("integer", False),
13 |     ("string", False),
14 |     ("bigint", False)
15 | ])
16 | def test_is_float(dtype, expected):
17 |     column = DuckDBColumn(column="float_test", dtype=dtype)
18 |     assert column.is_float() == expected
19 | 
20 | # Test cases for is_integer method
21 | @pytest.mark.parametrize("dtype, expected", [
22 |     ("tinyint", True),
23 |     ("smallint", True),
24 |     ("integer", True),
25 |     ("bigint", True),
26 |     ("hugeint", True),
27 |     ("utinyint", True),
28 |     ("usmallint", True),
29 |     ("uinteger", True),
30 |     ("ubigint", True),
31 |     ("int1", True),
32 |     ("int2", True),
33 |     ("int4", True),
34 |     ("int8", True),
35 |     ("short", True),
36 |     ("int", True),
37 |     ("signed", True),
38 |     ("long", True),
39 |     ("float", False),
40 |     ("string", False),
41 |     ("double", False)
42 | ])
43 | def test_is_integer(dtype, expected):
44 |     column = DuckDBColumn(column="integer_test", dtype=dtype)
45 |     assert column.is_integer() == expected
46 | 
47 | # Test cases for is_struct method
48 | @pytest.mark.parametrize("dtype, expected", [
49 |     ("struct(a integer, b varchar)", True),
50 |     ("struct(a integer)", True),
51 |     ("STRUCT(a integer, b varchar)", True),
52 |     ("integer", False),
53 |     ("varchar", False),
54 | ])
55 | def test_is_struct(dtype, expected):
56 |     column = DuckDBColumn(column="struct_test", dtype=dtype)
57 |     assert column.is_struct() == expected
58 | 
59 | # Test cases for flatten method
60 | def test_flatten_simple_struct():
61 |     column = DuckDBColumn(column="struct_test", dtype="struct(a integer, b varchar)")
62 |     flattened = column.flatten()
63 |     assert len(flattened) == 2
64 |     assert flattened[0].column == "struct_test.a"
65 |     assert flattened[0].dtype == "integer"
66 |     assert flattened[1].column == "struct_test.b"
67 |     assert flattened[1].dtype == "varchar"
68 | 
69 | def test_flatten_nested_struct():
70 |     column = DuckDBColumn(column="struct_test", dtype="struct(a integer, b struct(c integer, d varchar))")
71 |     flattened = column.flatten()
72 |     assert len(flattened) == 3
73 |     assert flattened[0].column == "struct_test.a"
74 |     assert flattened[0].dtype == "integer"
75 |     assert flattened[1].column == "struct_test.b.c"
76 |     assert flattened[1].dtype == "integer"
77 |     assert flattened[2].column == "struct_test.b.d"
78 |     assert flattened[2].dtype == "varchar"
79 | 
80 | def test_flatten_non_struct():
81 |     column = DuckDBColumn(column="integer_test", dtype="integer")
82 |     flattened = column.flatten()
83 |     assert len(flattened) == 1
84 |     assert flattened[0].column == "integer_test"
85 |     assert flattened[0].dtype == "integer"


--------------------------------------------------------------------------------
/tests/functional/plugins/motherduck/test_motherduck_write_conflict.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from dbt.tests.util import run_dbt
 3 | from dbt.exceptions import DbtRuntimeError
 4 | 
 5 | 
 6 | incremental_model_1_sql = """
 7 | {{ config(materialized='incremental') }}
 8 | 
 9 | select 
10 |     generate_series as id,
11 |     'model_1_data_' || generate_series::varchar as data,
12 |     current_timestamp as created_at
13 | from generate_series(1, 100)
14 | 
15 | {% if is_incremental() %}
16 |     where generate_series > (select coalesce(max(id), 0) from {{ this }})
17 | {% endif %}
18 | """
19 | 
20 | incremental_model_2_sql = """
21 | {{ config(materialized='incremental') }}
22 | 
23 | select 
24 |     generate_series as id,
25 |     'model_2_data_' || generate_series::varchar as data,
26 |     current_timestamp as created_at
27 | from generate_series(1, 50)
28 | 
29 | {% if is_incremental() %}
30 |     where generate_series > (select coalesce(max(id), 0) from {{ this }})
31 | {% endif %}
32 | """
33 | 
34 | 
35 | @pytest.mark.skip_profile("buenavista", "file", "memory")
36 | class TestMDWriteConflict:
37 |     """Test to reproduce the write-write conflict with multiple models trying to create the dbt_temp schema concurrently."""
38 | 
39 |     @pytest.fixture(scope="class")
40 |     def profiles_config_update(self, dbt_profile_target):
41 |         """Configure with 2 threads to trigger write conflict."""
42 |         return {
43 |             "test": {
44 |                 "outputs": {
45 |                     "dev": {
46 |                         "type": "duckdb",
47 |                         "path": "test_write_conflict.duckdb",
48 |                         "attach": [
49 |                             {
50 |                                 "path": "md:",
51 |                             }  # Attach MotherDuck
52 |                         ],
53 |                         "threads": 2,  # Enable threading to trigger conflict
54 |                     }
55 |                 },
56 |                 "target": "dev",
57 |             }
58 |         }
59 | 
60 |     @pytest.fixture(scope="class")
61 |     def models(self):
62 |         return {
63 |             "incremental_model_1.sql": incremental_model_1_sql,
64 |             "incremental_model_2.sql": incremental_model_2_sql,
65 |         }
66 | 
67 |     def test_write_conflict_on_second_run(self, project):
68 |         """
69 |         Test that reproduces the write-write conflict:
70 |         1. First run always succeeds (initializes both incremental models)
71 |         2. Second run, which is the first true incremental run, should succeed, 
72 |             while it previously failed with a write-write conflict due to
73 |             both models trying to create the dbt_temp schema simultaneously.
74 |         """
75 |         results = run_dbt(expect_pass=True)
76 | 
77 |         res1 = project.run_sql("SELECT count(*) FROM incremental_model_1", fetch="one")
78 |         assert res1[0] == 100
79 | 
80 |         res2 = project.run_sql("SELECT count(*) FROM incremental_model_2", fetch="one")
81 |         assert res2[0] == 50
82 | 
83 |         run_dbt(expect_pass=True)
84 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "Python 3",
 3 | 	"build": {
 4 | 		"dockerfile": "Dockerfile",
 5 | 		"context": "..",
 6 | 		"args": {
 7 | 			// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
 8 | 			// Append -bullseye or -buster to pin to an OS version.
 9 | 			// Use -bullseye variants on local on arm64/Apple Silicon.
10 | 			"VARIANT": "3.11",
11 | 			// Options
12 | 			"NODE_VERSION": "none"
13 | 		}
14 | 	},
15 | 	// Configure tool-specific properties.
16 | 	"customizations": {
17 | 		// Configure properties specific to VS Code.
18 | 		"vscode": {
19 | 			// Set *default* container specific settings.json values on container create.
20 | 			"settings": {
21 | 				"python.defaultInterpreterPath": "/usr/local/bin/python",
22 | 				"python.testing.pytestEnabled": true,
23 | 				"python.testing.unittestEnabled": false,
24 | 				"python.linting.enabled": true,
25 | 				"python.linting.flake8Enabled": true,
26 | 				"python.linting.mypyEnabled": true,
27 | 				"python.linting.pylintEnabled": false,
28 | 				"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
29 | 				"python.formatting.provider": "black",
30 | 				"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
31 | 				"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
32 | 				"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
33 | 				"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
34 | 				"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
35 | 				"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
36 | 				"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
37 | 				"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
38 | 				"[python]": {
39 | 					"editor.defaultFormatter": "ms-python.python",
40 | 					"editor.formatOnSave": true,
41 | 					"editor.tabSize": 4,
42 | 					"editor.codeActionsOnSave": {
43 | 						"source.organizeImports": true
44 | 					}
45 | 				}
46 | 			},
47 | 			// Add the IDs of extensions you want installed when the container is created.
48 | 			"extensions": [
49 | 				"ms-python.python",
50 | 				"ms-python.vscode-pylance"
51 | 			]
52 | 		}
53 | 	},
54 | 	// "features": {
55 | 	// 	// Allow the devcontainer to run host docker commands, see https://github.com/devcontainers/templates/tree/main/src/docker-outside-of-docker
56 | 	// 	"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
57 | 	// 		"enableNonRootDocker": true
58 | 	// 	}
59 | 	// },
60 | 	// "mounts": [
61 |     	// 	"source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind"
62 |   	// ],
63 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
64 | 	// "forwardPorts": [],
65 | 	// Use 'postCreateCommand' to run commands after the container is created.
66 | 	// "postCreateCommand": "pip3 install --user -r requirements.txt",
67 | 	// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
68 | 	"remoteUser": "vscode",
69 | 	"workspaceFolder": "/workspaces/dbt-duckdb",
70 | 	"postCreateCommand": "pip install -e . && pip install -r dev-requirements.txt"
71 | }
72 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_basic.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations
  4 | from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests
  5 | from dbt.tests.adapter.basic.test_singular_tests_ephemeral import (
  6 |     BaseSingularTestsEphemeral,
  7 | )
  8 | from dbt.tests.adapter.basic.test_empty import BaseEmpty
  9 | from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral
 10 | from dbt.tests.adapter.basic.test_incremental import BaseIncremental
 11 | from dbt.tests.adapter.basic.test_incremental import BaseIncrementalNotSchemaChange
 12 | from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests
 13 | from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols
 14 | from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp
 15 | from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod
 16 | from dbt.tests.adapter.basic.test_validate_connection import BaseValidateConnection
 17 | from dbt.tests.adapter.basic.test_docs_generate import (
 18 |     BaseDocsGenerate,
 19 |     BaseDocsGenReferences,
 20 | )
 21 | from dbt.tests.adapter.basic.expected_catalog import (
 22 |     base_expected_catalog,
 23 |     no_stats,
 24 |     expected_references_catalog,
 25 | )
 26 | 
 27 | 
 28 | class TestSimpleMaterializationsDuckDB(BaseSimpleMaterializations):
 29 |     pass
 30 | 
 31 | 
 32 | class TestSingularTestsDuckDB(BaseSingularTests):
 33 |     pass
 34 | 
 35 | 
 36 | class TestSingularTestsEphemeralDuckDB(BaseSingularTestsEphemeral):
 37 |     pass
 38 | 
 39 | 
 40 | class TestEmptyDuckDB(BaseEmpty):
 41 |     pass
 42 | 
 43 | 
 44 | class TestEphemeralDuckDB(BaseEphemeral):
 45 |     pass
 46 | 
 47 | 
 48 | class TestIncrementalDuckDB(BaseIncremental):
 49 |     pass
 50 | 
 51 | class TestBaseIncrementalNotSchemaChange(BaseIncrementalNotSchemaChange):
 52 |     pass
 53 | 
 54 | 
 55 | class TestGenericTestsDuckDB(BaseGenericTests):
 56 |     pass
 57 | 
 58 | 
 59 | class TestSnapshotCheckColsDuckDB(BaseSnapshotCheckCols):
 60 |     pass
 61 | 
 62 | 
 63 | class TestSnapshotTimestampDuckDB(BaseSnapshotTimestamp):
 64 |     pass
 65 | 
 66 | 
 67 | class TestBaseAdapterMethodDuckDB(BaseAdapterMethod):
 68 |     pass
 69 | 
 70 | 
 71 | class TestValidateConnectionDuckDB(BaseValidateConnection):
 72 |     pass
 73 | 
 74 | 
 75 | class TestDocsGenerateDuckDB(BaseDocsGenerate):
 76 |     @pytest.fixture(scope="class")
 77 |     def expected_catalog(self, project):
 78 |         return base_expected_catalog(
 79 |             project,
 80 |             role=None,
 81 |             id_type="INTEGER",
 82 |             text_type="VARCHAR",
 83 |             time_type="TIMESTAMP",
 84 |             view_type="VIEW",
 85 |             table_type="BASE TABLE",
 86 |             model_stats=no_stats(),
 87 |         )
 88 | 
 89 | 
 90 | class TestDocsGenReferencesDuckDB(BaseDocsGenReferences):
 91 |     @pytest.fixture(scope="class")
 92 |     def expected_catalog(self, project):
 93 |         return expected_references_catalog(
 94 |             project,
 95 |             role=None,
 96 |             id_type="INTEGER",
 97 |             text_type="VARCHAR",
 98 |             time_type="TIMESTAMP",
 99 |             view_type="VIEW",
100 |             table_type="BASE TABLE",
101 |             model_stats=no_stats(),
102 |             bigint_type="BIGINT",
103 |         )
104 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_constraints.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from dbt.tests.adapter.constraints.test_constraints import (
  4 |     BaseTableConstraintsColumnsEqual,
  5 |     BaseViewConstraintsColumnsEqual,
  6 |     BaseIncrementalConstraintsColumnsEqual,
  7 |     BaseConstraintsRuntimeDdlEnforcement,
  8 |     BaseConstraintsRollback,
  9 |     BaseIncrementalConstraintsRuntimeDdlEnforcement,
 10 |     BaseIncrementalConstraintsRollback,
 11 |     BaseModelConstraintsRuntimeEnforcement,
 12 | )
 13 | 
 14 | 
 15 | class DuckDBColumnEqualSetup:
 16 |     @pytest.fixture
 17 |     def int_type(self):
 18 |         return "INT"
 19 | 
 20 |     @pytest.fixture
 21 |     def string_type(self):
 22 |         return "VARCHAR"
 23 | 
 24 |     @pytest.fixture
 25 |     def data_types(self, schema_int_type, int_type, string_type):
 26 |         # sql_column_value, schema_data_type, error_data_type
 27 |         return [
 28 |             ["1", schema_int_type, int_type],
 29 |             ["'1'", string_type, string_type],
 30 |             ["true", "bool", "BOOL"],
 31 |             ["'2013-11-03 00:00:00-07'::timestamp", "TIMESTAMP", "TIMESTAMP"],
 32 |             ["'2013-11-03 00:00:00-07'::timestamptz", "TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE"],
 33 |             ["ARRAY['a','b','c']", "VARCHAR[]", "VARCHAR[]"],
 34 |             ["ARRAY[1,2,3]", "INTEGER[]", "INTEGER[]"],
 35 |             ["'1'::numeric", "numeric", "DECIMAL"],
 36 |             [
 37 |                 """'{"bar": "baz", "balance": 7.77, "active": false}'::json""",
 38 |                 "json",
 39 |                 "JSON",
 40 |             ],
 41 |         ]
 42 | 
 43 | 
 44 | class TestTableConstraintsColumnsEqual(
 45 |     DuckDBColumnEqualSetup, BaseTableConstraintsColumnsEqual
 46 | ):
 47 |     pass
 48 | 
 49 | 
 50 | class TestViewConstraintsColumnsEqual(
 51 |     DuckDBColumnEqualSetup, BaseViewConstraintsColumnsEqual
 52 | ):
 53 |     pass
 54 | 
 55 | 
 56 | class TestIncrementalConstraintsColumnsEqual(
 57 |     DuckDBColumnEqualSetup, BaseIncrementalConstraintsColumnsEqual
 58 | ):
 59 |     pass
 60 | 
 61 | 
 62 | @pytest.mark.skip_profile("md")
 63 | class TestTableConstraintsRuntimeDdlEnforcement(
 64 |     DuckDBColumnEqualSetup, BaseConstraintsRuntimeDdlEnforcement
 65 | ):
 66 |     pass
 67 | 
 68 | 
 69 | @pytest.mark.skip_profile("md", "buenavista")
 70 | class TestTableConstraintsRollback(DuckDBColumnEqualSetup, BaseConstraintsRollback):
 71 |     @pytest.fixture(scope="class")
 72 |     def expected_error_messages(self):
 73 |         return ["NOT NULL constraint failed"]
 74 | 
 75 | 
 76 | @pytest.mark.skip_profile("md")
 77 | class TestIncrementalConstraintsRuntimeDdlEnforcement(
 78 |     DuckDBColumnEqualSetup, BaseIncrementalConstraintsRuntimeDdlEnforcement
 79 | ):
 80 |     @pytest.fixture(scope="class")
 81 |     def expected_error_messages(self):
 82 |         return ["NOT NULL constraint failed"]
 83 | 
 84 | 
 85 | @pytest.mark.skip_profile("md", "buenavista")
 86 | class TestIncrementalConstraintsRollback(
 87 |     DuckDBColumnEqualSetup, BaseIncrementalConstraintsRollback
 88 | ):
 89 |     @pytest.fixture(scope="class")
 90 |     def expected_error_messages(self):
 91 |         return ["NOT NULL constraint failed"]
 92 | 
 93 | 
 94 | @pytest.mark.skip_profile("md")
 95 | class TestModelConstraintsRuntimeEnforcement(
 96 |     DuckDBColumnEqualSetup, BaseModelConstraintsRuntimeEnforcement
 97 | ):
 98 |     @pytest.fixture(scope="class")
 99 |     def expected_error_messages(self):
100 |         return ["NOT NULL constraint failed"]
101 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/secrets.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Any
 3 | from typing import Dict
 4 | from typing import List
 5 | from typing import Optional
 6 | from typing import Union
 7 | 
 8 | from dbt_common.dataclass_schema import dbtClassMixin
 9 | 
10 | 
11 | DEFAULT_SECRET_PREFIX = "_dbt_secret_"
12 | 
13 | 
14 | @dataclass
15 | class Secret(dbtClassMixin):
16 |     type: str
17 |     persistent: Optional[bool] = False
18 |     name: Optional[str] = None
19 |     provider: Optional[str] = None
20 |     scope: Optional[Union[str, List[str]]] = None
21 |     secret_kwargs: Optional[Dict[str, Any]] = None
22 | 
23 |     @classmethod
24 |     def create(
25 |         cls,
26 |         secret_type: str,
27 |         persistent: Optional[bool] = None,
28 |         name: Optional[str] = None,
29 |         provider: Optional[str] = None,
30 |         scope: Optional[Union[str, List[str]]] = None,
31 |         **kwargs,
32 |     ):
33 |         # Create and return Secret
34 |         return cls(
35 |             type=secret_type,
36 |             persistent=persistent,
37 |             name=name,
38 |             provider=provider,
39 |             scope=scope,
40 |             secret_kwargs=kwargs,
41 |         )
42 | 
43 |     def _format_value(self, key: str, value: Any) -> str:
44 |         """Format a value for DuckDB SQL based on its type and key."""
45 |         # Keys that should not be quoted
46 |         unquoted_keys = ["type", "provider", "extra_http_headers"]
47 | 
48 |         if isinstance(value, dict):
49 |             # Format as DuckDB map: map {'key1': 'value1', 'key2': 'value2'}
50 |             items = [f"'{k}': '{v}'" for k, v in value.items()]
51 |             return f"{key} map {{{', '.join(items)}}}"
52 |         elif isinstance(value, list):
53 |             # Format as DuckDB array: array ['item1', 'item2']
54 |             items = [f"'{item}'" for item in value]
55 |             return f"{key} array [{', '.join(items)}]"
56 |         elif key in unquoted_keys:
57 |             return f"{key} {value}"
58 |         else:
59 |             return f"{key} '{value}'"
60 | 
61 |     def to_sql(self) -> str:
62 |         name = f" {self.name}" if self.name else ""
63 |         or_replace = " OR REPLACE" if name else ""
64 |         persistent = " PERSISTENT" if self.persistent is True else ""
65 |         tab = "    "
66 |         params = self.to_dict(omit_none=True)
67 |         params.update(params.pop("secret_kwargs", {}))
68 | 
69 |         scope_value: Optional[List[str]] = None
70 |         raw_scope = params.get("scope")
71 |         if isinstance(raw_scope, str):
72 |             scope_value = [raw_scope]
73 |         elif isinstance(raw_scope, list):
74 |             scope_value = raw_scope
75 | 
76 |         if scope_value is not None:
77 |             params.pop("scope", None)
78 |             params_sql: List[str] = []
79 |             for key, value in params.items():
80 |                 if value is not None and key not in ["name", "persistent"]:
81 |                     params_sql.append(self._format_value(key, value))
82 |             for s in scope_value:
83 |                 params_sql.append(f"scope '{s}'")
84 | 
85 |             params_sql_str = f",\n{tab}".join(params_sql)
86 |         else:
87 |             params_sql_list = [
88 |                 self._format_value(key, value)
89 |                 for key, value in params.items()
90 |                 if value is not None and key not in ["name", "persistent"]
91 |             ]
92 |             params_sql_str = f",\n{tab}".join(params_sql_list)
93 | 
94 |         sql = f"""CREATE{or_replace}{persistent} SECRET{name} (\n{tab}{params_sql_str}\n)"""
95 |         return sql
96 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/motherduck.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from typing import Dict
 3 | from urllib.parse import parse_qs
 4 | from urllib.parse import urlparse
 5 | 
 6 | from duckdb import DuckDBPyConnection
 7 | 
 8 | from . import BasePlugin
 9 | from dbt.adapters.duckdb.__version__ import version as __plugin_version__
10 | from dbt.adapters.duckdb.credentials import DuckDBCredentials
11 | from dbt.version import __version__
12 | 
13 | CUSTOM_USER_AGENT = "custom_user_agent"
14 | MOTHERDUCK_EXT = "motherduck"
15 | # MotherDuck config options, in order in which they need to be set
16 | # (SaaS mode is last because it locks other config options)
17 | MOTHERDUCK_CONFIG_OPTIONS = [
18 |     "motherduck_token",
19 |     "motherduck_attach_mode",
20 |     "motherduck_saas_mode",
21 | ]
22 | 
23 | 
24 | class Plugin(BasePlugin):
25 |     def initialize(self, plugin_config: Dict[str, Any]):
26 |         self._config = plugin_config
27 | 
28 |     @staticmethod
29 |     def get_config_from_path(path):
30 |         return {key: value[0] for key, value in parse_qs(urlparse(path).query).items()}
31 | 
32 |     @staticmethod
33 |     def get_md_config_settings(config):
34 |         # Get MotherDuck config settings
35 |         md_config = {}
36 |         for name in MOTHERDUCK_CONFIG_OPTIONS:
37 |             for key in [
38 |                 name,
39 |                 name.replace("motherduck_", ""),
40 |                 name.upper(),
41 |                 name.replace("motherduck_", "").upper(),
42 |             ]:
43 |                 if key in config:
44 |                     md_config[name] = config[key]
45 | 
46 |         # Sort values (SaaS mode should be set last)
47 |         return dict(
48 |             sorted(
49 |                 md_config.items(),
50 |                 key=lambda x: MOTHERDUCK_CONFIG_OPTIONS.index(x[0]),
51 |             )
52 |         )
53 | 
54 |     def configure_connection(self, conn: DuckDBPyConnection):
55 |         conn.load_extension(MOTHERDUCK_EXT)
56 |         # If a MotherDuck database is in attachments,
57 |         # set config options *before* attaching
58 |         if self.creds is not None and self.creds.is_motherduck_attach:
59 |             config = {}
60 | 
61 |             # add config options specified in the path
62 |             for attachment in self.creds.motherduck_attach:
63 |                 config.update(self.get_config_from_path(attachment.path))
64 | 
65 |             # add config options specified via plugin config
66 |             config.update(self._config)
67 | 
68 |             # add config options specified via settings
69 |             if self.creds.settings is not None:
70 |                 config.update(self.creds.settings)
71 | 
72 |             # set MD config options and remove from settings
73 |             for key, value in self.get_md_config_settings(config).items():
74 |                 conn.execute(f"SET {key} = '{value}'")
75 |                 if self.creds.settings is not None and key in self.creds.settings:
76 |                     self.creds.settings.pop(key)
77 | 
78 |     def update_connection_config(self, creds: DuckDBCredentials, config: Dict[str, Any]):
79 |         user_agent = f"dbt/{__version__} dbt-duckdb/{__plugin_version__}"
80 |         settings: Dict[str, Any] = creds.settings or {}
81 |         custom_user_agent = config.get(CUSTOM_USER_AGENT) or settings.pop(CUSTOM_USER_AGENT, None)
82 |         if custom_user_agent:
83 |             user_agent = f"{user_agent} {custom_user_agent}"
84 |         config[CUSTOM_USER_AGENT] = user_agent
85 | 
86 |         # If a user specified MotherDuck config options via the plugin config,
87 |         # pass it to the config kwarg in duckdb.connect.
88 |         if not creds.is_motherduck_attach:
89 |             config.update(self.get_md_config_settings(self._config))
90 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | name: Publish Python 🐍 distribution 📦 to PyPI
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - '[0-9]+.[0-9]+.[0-9]+'
  7 | 
  8 | jobs:
  9 |   build:
 10 |     name: Build distribution 📦
 11 |     runs-on: ubuntu-latest
 12 | 
 13 |     steps:
 14 |     - uses: actions/checkout@v6
 15 |     - name: Set up Python
 16 |       uses: actions/setup-python@v6
 17 |       with:
 18 |         python-version: "3.x"
 19 |     - name: Install pypa/build
 20 |       run: >-
 21 |         python3 -m
 22 |         pip install
 23 |         build
 24 |         --user
 25 |     - name: Build a binary wheel and a source tarball
 26 |       run: python3 -m build
 27 |     - name: Store the distribution packages
 28 |       uses: actions/upload-artifact@v6
 29 |       with:
 30 |         name: python-package-distributions
 31 |         path: dist/
 32 | 
 33 |   publish-to-pypi:
 34 |     name: >-
 35 |       Publish Python 🐍 distribution 📦 to PyPI
 36 |     if: startsWith(github.ref, 'refs/tags/')  # only publish to PyPI on tag pushes
 37 |     needs:
 38 |     - build
 39 |     runs-on: ubuntu-latest
 40 |     environment:
 41 |       name: pypi
 42 |       url: https://pypi.org/p/dbt-duckdb
 43 |     permissions:
 44 |       id-token: write  # IMPORTANT: mandatory for trusted publishing
 45 | 
 46 |     steps:
 47 |     - name: Download all the dists
 48 |       uses: actions/download-artifact@v7
 49 |       with:
 50 |         name: python-package-distributions
 51 |         path: dist/
 52 |     - name: Publish distribution 📦 to PyPI
 53 |       uses: pypa/gh-action-pypi-publish@release/v1
 54 | 
 55 |   github-release:
 56 |     name: >-
 57 |       Sign the Python 🐍 distribution 📦 with Sigstore
 58 |       and upload them to GitHub Release
 59 |     needs:
 60 |     - publish-to-pypi
 61 |     runs-on: ubuntu-latest
 62 | 
 63 |     permissions:
 64 |       contents: write  # IMPORTANT: mandatory for making GitHub Releases
 65 |       id-token: write  # IMPORTANT: mandatory for sigstore
 66 | 
 67 |     steps:
 68 |     - name: Download all the dists
 69 |       uses: actions/download-artifact@v7
 70 |       with:
 71 |         name: python-package-distributions
 72 |         path: dist/
 73 |     - name: Sign the dists with Sigstore
 74 |       uses: sigstore/gh-action-sigstore-python@v3.2.0
 75 |       with:
 76 |         inputs: >-
 77 |           ./dist/*.tar.gz
 78 |           ./dist/*.whl
 79 |     - name: Create GitHub Release
 80 |       env:
 81 |         GITHUB_TOKEN: ${{ github.token }}
 82 |       run: >-
 83 |         gh release create
 84 |         '${{ github.ref_name }}'
 85 |         --repo '${{ github.repository }}'
 86 |         --title '${{ github.ref_name }}'
 87 |         --generate-notes
 88 |     - name: Upload artifact signatures to GitHub Release
 89 |       env:
 90 |         GITHUB_TOKEN: ${{ github.token }}
 91 |       # Upload to GitHub Release using the `gh` CLI.
 92 |       # `dist/` contains the built packages, and the
 93 |       # sigstore-produced signatures and certificates.
 94 |       run: >-
 95 |         gh release upload
 96 |         '${{ github.ref_name }}' dist/**
 97 |         --repo '${{ github.repository }}'
 98 | 
 99 |   notify-failure:
100 |     name: Send Slack notification on failure
101 |     if: failure()
102 |     needs: [build, publish-to-pypi, github-release]
103 |     runs-on: ubuntu-latest
104 |     steps:
105 |       - name: Send Slack notification
106 |         uses: slackapi/slack-github-action@v2.1.1
107 |         with:
108 |           webhook: ${{ secrets.MOTHERDUCK_CI_NOTIFICATION_WEBHOOK }}
109 |           webhook-type: webhook-trigger
110 |           payload: |
111 |             {
112 |               "text": "dbt-duckdb release workflow failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
113 |             }
114 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/column.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from dataclasses import dataclass
  3 | from dataclasses import field
  4 | from typing import List
  5 | 
  6 | from dbt.adapters.base.column import Column
  7 | 
  8 | 
  9 | @dataclass
 10 | class DuckDBColumn(Column):
 11 |     fields: List["DuckDBColumn"] = field(default_factory=list)
 12 | 
 13 |     def __post_init__(self):
 14 |         if self.is_struct():
 15 |             self._parse_struct_fields()
 16 | 
 17 |     def _parse_struct_fields(self):
 18 |         # In DuckDB, structs are defined as STRUCT(key1 type1, key2 type2, ...)
 19 |         # We need to extract the key-type pairs from the struct definition
 20 |         # e.g., STRUCT(a VARCHAR, b INTEGER) -> ["a VARCHAR", "b INTEGER"]
 21 |         # We can't just split by comma, because types can contain commas
 22 |         # e.g. DECIMAL(10, 2)
 23 |         # The following logic will handle nested structs and complex types
 24 |         match = re.match(r"STRUCT\((.*)\)", self.dtype, re.IGNORECASE)
 25 |         if not match:
 26 |             return
 27 | 
 28 |         content = match.group(1)
 29 | 
 30 |         fields = []
 31 |         paren_level = 0
 32 |         current_field = ""
 33 |         for char in content:
 34 |             if char == "(":
 35 |                 paren_level += 1
 36 |             elif char == ")":
 37 |                 paren_level -= 1
 38 | 
 39 |             if char == "," and paren_level == 0:
 40 |                 fields.append(current_field.strip())
 41 |                 current_field = ""
 42 |             else:
 43 |                 current_field += char
 44 |         fields.append(current_field.strip())
 45 | 
 46 |         for f in fields:
 47 |             # Split on the first space to separate the name from the type
 48 |             parts = f.split(" ", 1)
 49 |             col_name = parts[0]
 50 |             col_type = parts[1]
 51 |             self.fields.append(DuckDBColumn(column=col_name, dtype=col_type))
 52 | 
 53 |     def is_float(self):
 54 |         return self.dtype.lower() in {
 55 |             # floats
 56 |             "real",
 57 |             "float",
 58 |             "float4",
 59 |             "float8",
 60 |             "double",
 61 |         }
 62 | 
 63 |     def is_integer(self) -> bool:
 64 |         return self.dtype.lower() in {
 65 |             # signed types
 66 |             "tinyint",
 67 |             "smallint",
 68 |             "integer",
 69 |             "bigint",
 70 |             "hugeint",
 71 |             # unsigned types
 72 |             "utinyint",
 73 |             "usmallint",
 74 |             "uinteger",
 75 |             "ubigint",
 76 |             # aliases
 77 |             "int1",
 78 |             "int2",
 79 |             "int4",
 80 |             "int8",
 81 |             "short",
 82 |             "int",
 83 |             "signed",
 84 |             "long",
 85 |         }
 86 | 
 87 |     def is_struct(self) -> bool:
 88 |         return self.dtype.lower().startswith("struct")
 89 | 
 90 |     def flatten(self) -> List["DuckDBColumn"]:
 91 |         if not self.is_struct():
 92 |             return [self]
 93 | 
 94 |         flat_columns: List["DuckDBColumn"] = []
 95 |         for column_field in self.fields:
 96 |             if column_field.is_struct():
 97 |                 # Recursively flatten nested structs
 98 |                 for nested_field in column_field.flatten():
 99 |                     flat_columns.append(
100 |                         DuckDBColumn(
101 |                             column=f"{self.column}.{nested_field.column}",
102 |                             dtype=nested_field.dtype,
103 |                         )
104 |                     )
105 |             else:
106 |                 flat_columns.append(
107 |                     DuckDBColumn(
108 |                         column=f"{self.column}.{column_field.column}",
109 |                         dtype=column_field.dtype,
110 |                     )
111 |                 )
112 |         return flat_columns
113 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/test_delta.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pathlib import Path
  3 | import pandas as pd
  4 | import tempfile
  5 | 
  6 | from dbt.tests.util import (
  7 |     run_dbt,
  8 | )
  9 | from deltalake.writer import write_deltalake
 10 | 
 11 | delta_schema_yml = """
 12 | version: 2
 13 | sources:
 14 |   - name: delta_source
 15 |     meta:
 16 |       plugin: delta
 17 |     tables:
 18 |       - name: table_1
 19 |         description: "An delta table"
 20 |         meta:
 21 |           delta_table_path: "{test_delta_path1}"
 22 | 
 23 |   - name: delta_source_test
 24 |     schema: test
 25 |     meta:
 26 |       plugin: delta
 27 |     tables:
 28 |       - name: table_2
 29 |         description: "An delta table"
 30 |         meta:
 31 |           delta_table_path: "{test_delta_path2}"
 32 |           as_of_version: 0
 33 | """
 34 | 
 35 | 
 36 | delta1_sql = """
 37 |     {{ config(materialized='table') }}
 38 |     select * from {{ source('delta_source', 'table_1') }}
 39 | """
 40 | delta2_sql = """
 41 |     {{ config(materialized='table') }}
 42 |     select * from {{ source('delta_source', 'table_1') }} limit 1
 43 | """
 44 | delta3_sql = """
 45 |     {{ config(materialized='table') }}
 46 |     select * as a from {{ source('delta_source_test', 'table_2') }} WHERE y = 'd'
 47 | """
 48 | 
 49 | delta3_sql_expected = """
 50 |     select 1 as x, 'a' as y
 51 | """
 52 | 
 53 | 
 54 | @pytest.mark.skip_profile("buenavista", "md")
 55 | class TestPlugins:
 56 |     @pytest.fixture(scope="class")
 57 |     def delta_test_table1(self):
 58 |         td = tempfile.TemporaryDirectory() 
 59 |         path = Path(td.name)
 60 |         table_path = path / "test_delta_table1"
 61 | 
 62 |         df = pd.DataFrame({"x": [1, 2, 3]})
 63 |         write_deltalake(table_path, df, mode="overwrite")
 64 | 
 65 |         yield table_path
 66 | 
 67 |         td.cleanup()
 68 | 
 69 |     @pytest.fixture(scope="class")
 70 |     def delta_test_table2(self):
 71 |         td = tempfile.TemporaryDirectory() 
 72 |         path = Path(td.name)
 73 |         table_path = path / "test_delta_table2"
 74 | 
 75 |         df = pd.DataFrame({
 76 |             "x": [1],
 77 |             "y": ["a"]                   
 78 |         })
 79 |         write_deltalake(table_path, df, mode="overwrite")
 80 | 
 81 |         df = pd.DataFrame({
 82 |             "x": [1, 2],
 83 |             "y": ["a","b"]                   
 84 |         })
 85 |         write_deltalake(table_path, df, mode="overwrite")
 86 | 
 87 |         yield table_path
 88 | 
 89 |         td.cleanup()
 90 | 
 91 |     @pytest.fixture(scope="class")
 92 |     def profiles_config_update(self, dbt_profile_target):
 93 |         plugins = [{"module": "delta"}]
 94 |         return {
 95 |             "test": {
 96 |                 "outputs": {
 97 |                     "dev": {
 98 |                         "type": "duckdb",
 99 |                         "path": dbt_profile_target.get("path", ":memory:"),
100 |                         "plugins": plugins,
101 |                     }
102 |                 },
103 |                 "target": "dev",
104 |             }
105 |         }
106 | 
107 |     @pytest.fixture(scope="class")
108 |     def models(self, delta_test_table1,delta_test_table2):
109 |         return {
110 |             "source_schema.yml": delta_schema_yml.format(
111 |                 test_delta_path1=delta_test_table1,
112 |                 test_delta_path2=delta_test_table2
113 |             ),
114 |             "delta_table1.sql": delta1_sql,
115 |             "delta_table2.sql": delta2_sql,
116 |             "delta_table3.sql": delta3_sql,
117 |             "delta_table3_expected.sql": delta3_sql_expected,
118 |         }
119 | 
120 |     def test_plugins(self, project):
121 |         results = run_dbt()
122 |         assert len(results) == 4
123 | 
124 |         # check_relations_equal(
125 |         #     project.adapter,
126 |         #     [
127 |         #         "delta_table3",
128 |         #         "delta_table3_expected",
129 |         #     ],
130 |         # )
131 |         # res = project.run_sql("SELECT count(1) FROM 'delta_table3'", fetch="one")
132 |         # assert res[0] == 2
133 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/relation.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from string import Template
 3 | from typing import Any
 4 | from typing import Optional
 5 | from typing import Type
 6 | 
 7 | from .connections import DuckDBConnectionManager
 8 | from .utils import SourceConfig
 9 | from dbt.adapters.base.relation import BaseRelation
10 | from dbt.adapters.contracts.relation import HasQuoting
11 | from dbt.adapters.contracts.relation import RelationConfig
12 | 
13 | 
14 | @dataclass(frozen=True, eq=False, repr=False)
15 | class DuckDBRelation(BaseRelation):
16 |     require_alias: bool = False
17 |     external: Optional[str] = None
18 | 
19 |     @classmethod
20 |     def create_from(
21 |         cls: Type["DuckDBRelation"],
22 |         quoting: HasQuoting,
23 |         relation_config: RelationConfig,
24 |         **kwargs: Any,
25 |     ) -> "DuckDBRelation":
26 |         if relation_config.resource_type == "source":
27 |             return cls.create_from_source(quoting, relation_config, **kwargs)
28 |         else:
29 |             return super().create_from(quoting, relation_config, **kwargs)
30 | 
31 |     @classmethod
32 |     def create_from_source(
33 |         cls: Type["DuckDBRelation"], quoting: HasQuoting, source: RelationConfig, **kwargs: Any
34 |     ) -> "DuckDBRelation":
35 |         """
36 |         This method creates a new DuckDBRelation instance from a source definition.
37 |         It first checks if a 'plugin' is defined in the meta argument for the source or its parent configuration.
38 |         If a 'plugin' is defined, it uses the environment associated with this run to get the name of the source that we should reference in the compiled model.
39 |         If an 'external_location' is defined, it formats the location based on the 'formatter' defined in the source configuration.
40 |         If the 'formatter' is not recognized, it raises a ValueError.
41 |         Finally, it calls the parent class's create_from_source method to create the DuckDBRelation instance.
42 | 
43 |         :param cls: The class that this method is a part of.
44 |         :param source: The source definition to create the DuckDBRelation from.
45 |         :param kwargs: Additional keyword arguments.
46 |         :return: A new DuckDBRelation instance.
47 |         """
48 |         source_config = SourceConfig.create_from_source(source)
49 |         # First check to see if a 'plugin' is defined in the meta argument for
50 |         # the source or its parent configuration, and if it is, use the environment
51 |         # associated with this run to get the name of the source that we should
52 |         # reference in the compiled model
53 |         if "plugin" in source_config:
54 |             plugin_name = source_config["plugin"]
55 |             if DuckDBConnectionManager._ENV is not None:
56 |                 # No connection means we are probably in the dbt parsing phase, so don't load yet.
57 |                 DuckDBConnectionManager.env().load_source(plugin_name, source_config)
58 |         elif "external_location" in source_config:
59 |             ext_location_template = source_config["external_location"]
60 |             formatter = source_config.get("formatter", "newstyle")
61 |             if formatter == "newstyle":
62 |                 ext_location = ext_location_template.format_map(source_config.as_dict())
63 |             elif formatter == "oldstyle":
64 |                 ext_location = ext_location_template % source_config.as_dict()
65 |             elif formatter == "template":
66 |                 ext_location = Template(ext_location_template).substitute(source_config.as_dict())
67 |             else:
68 |                 raise ValueError(
69 |                     f"Formatter {formatter} not recognized. Must be one of 'newstyle', 'oldstyle', or 'template'."
70 |                 )
71 | 
72 |             # If it's a function call or already has single quotes, don't add them
73 |             if "(" not in ext_location and not ext_location.startswith("'"):
74 |                 ext_location = f"'{ext_location}'"
75 |             kwargs["external"] = ext_location
76 | 
77 |         return super().create_from(quoting, source, **kwargs)  # type: ignore
78 | 
79 |     def render(self) -> str:
80 |         if self.external:
81 |             return self.external
82 |         else:
83 |             return super().render()
84 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/test_plugins.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | import sqlite3
  4 | 
  5 | from dbt.tests.util import (
  6 |     check_relations_equal,
  7 |     run_dbt,
  8 | )
  9 | 
 10 | sqlalchemy_schema_yml = """
 11 | version: 2
 12 | sources:
 13 |   - name: sql_source
 14 |     schema: main
 15 |     config:
 16 |       plugin: sql
 17 |       save_mode: ignore
 18 |     tables:
 19 |       - name: tt1
 20 |         description: "My first SQLAlchemy table"
 21 |         config:
 22 |           query: "SELECT * FROM {identifier} WHERE id=:id"
 23 |           params:
 24 |             id: 1
 25 |       - name: tt2
 26 |         config:
 27 |           table: "test_table2"
 28 | """
 29 | 
 30 | 
 31 | sqlalchemy1_sql = """
 32 |     select * from {{ source('sql_source', 'tt1') }}
 33 | """
 34 | sqlalchemy2_sql = """
 35 |    {{ config(materialized='external', plugin='sql') }}
 36 |     select * from {{ source('sql_source', 'tt2') }}
 37 | """
 38 | plugin_sql = """
 39 |     {{ config(materialized='external', plugin='cfp', key='value') }}
 40 |     select foo() as foo
 41 | """
 42 | 
 43 | 
 44 | @pytest.mark.skip_profile("buenavista", "md")
 45 | class TestPlugins:
 46 |     @pytest.fixture(scope="class")
 47 |     def sqlite_test_db(self):
 48 |         path = "/tmp/satest.db"
 49 |         db = sqlite3.connect(path)
 50 |         cursor = db.cursor()
 51 |         cursor.execute("CREATE TABLE tt1 (id int, name text)")
 52 |         cursor.execute("INSERT INTO tt1 VALUES (1, 'John Doe')")
 53 |         cursor.execute("INSERT INTO tt1 VALUES (2, 'Jane Smith')")
 54 |         cursor.execute("CREATE TABLE test_table2 (a int, b int, c int)")
 55 |         cursor.execute("INSERT INTO test_table2 VALUES (1, 2, 3), (4, 5, 6)")
 56 |         cursor.close()
 57 |         db.commit()
 58 |         db.close()
 59 | 
 60 |         yield path
 61 | 
 62 |         # verify that the external plugin operation works to write to the db
 63 |         db = sqlite3.connect(path)
 64 |         cursor = db.cursor()
 65 |         res = cursor.execute("SELECT * FROM sqlalchemy2").fetchall()
 66 |         assert len(res) == 2
 67 |         assert res[0] == (1, 2, 3)
 68 |         assert res[1] == (4, 5, 6)
 69 |         cursor.close()
 70 |         db.close()
 71 | 
 72 |         os.unlink(path)
 73 | 
 74 |     @pytest.fixture(scope="class")
 75 |     def profiles_config_update(self, dbt_profile_target, sqlite_test_db):
 76 |         sa_config = {"connection_url": f"sqlite:///{sqlite_test_db}"}
 77 |         plugins = [
 78 |             {"module": "sqlalchemy", "alias": "sql", "config": sa_config},
 79 |             {"module": "tests.create_function_plugin", "alias": "cfp"},
 80 |         ]
 81 | 
 82 |         return {
 83 |             "test": {
 84 |                 "outputs": {
 85 |                     "dev": {
 86 |                         "type": "duckdb",
 87 |                         "path": dbt_profile_target.get("path", ":memory:"),
 88 |                         "plugins": plugins,
 89 |                         "retries": {"query_attempts": 2},
 90 |                     }
 91 |                 },
 92 |                 "target": "dev",
 93 |             }
 94 |         }
 95 | 
 96 |     @pytest.fixture(scope="class")
 97 |     def models(self, test_data_path):
 98 |         return {
 99 |             "schema_sqlalchemy.yml": sqlalchemy_schema_yml,
100 |             "sqlalchemy1.sql": sqlalchemy1_sql,
101 |             "sqlalchemy2.sql": sqlalchemy2_sql,
102 |             "foo.sql": plugin_sql,
103 |         }
104 | 
105 |     def test_plugins(self, project):
106 |         results = run_dbt()
107 |         assert len(results) == 3
108 | 
109 |         res = project.run_sql("SELECT COUNT(1) FROM tt1", fetch="one")
110 |         assert res[0] == 1
111 |         check_relations_equal(
112 |             project.adapter,
113 |             [
114 |                 "tt1",
115 |                 "sqlalchemy1",
116 |             ],
117 |         )
118 | 
119 |         res = project.run_sql("SELECT COUNT(1) FROM tt2", fetch="one")
120 |         assert res[0] == 2
121 |         check_relations_equal(
122 |             project.adapter,
123 |             [
124 |                 "tt2",
125 |                 "sqlalchemy2",
126 |             ],
127 |         )
128 | 
129 |         res = project.run_sql("SELECT foo FROM foo", fetch="one")
130 |         assert res[0] == 1729
131 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_ephemeral.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | import pytest
  5 | from dbt.tests.adapter.ephemeral.test_ephemeral import (
  6 |     BaseEphemeral,
  7 |     BaseEphemeralMulti,
  8 |     ephemeral_errors__base__base_copy_sql,
  9 |     ephemeral_errors__base__base_sql,
 10 |     ephemeral_errors__dependent_sql,
 11 |     models_n__ephemeral_level_two_sql,
 12 |     models_n__ephemeral_sql,
 13 |     models_n__root_view_sql,
 14 |     models_n__source_table_sql,
 15 | )
 16 | from dbt.tests.util import check_relations_equal, run_dbt
 17 | 
 18 | 
 19 | class TestEphemeralMulti(BaseEphemeralMulti):
 20 |     def test_ephemeral_multi(self, project):
 21 |         db = project.database
 22 | 
 23 |         run_dbt(["seed"])
 24 |         results = run_dbt(["run"])
 25 |         assert len(results) == 3
 26 | 
 27 |         check_relations_equal(project.adapter, ["seed", "dependent"])
 28 |         check_relations_equal(project.adapter, ["seed", "double_dependent"])
 29 |         check_relations_equal(project.adapter, ["seed", "super_dependent"])
 30 |         assert os.path.exists("./target/run/test/models/double_dependent.sql")
 31 |         with open("./target/run/test/models/double_dependent.sql", "r") as fp:
 32 |             sql_file = fp.read()
 33 | 
 34 |         sql_file = re.sub(r"\d+", "", sql_file)
 35 |         expected_sql = (
 36 |             f'create view "{db}"."test_test_ephemeral"."double_dependent__dbt_tmp" as ('
 37 |             "with __dbt__cte__base as ("
 38 |             "select * from test_test_ephemeral.seed"
 39 |             "),  __dbt__cte__base_copy as ("
 40 |             "select * from __dbt__cte__base"
 41 |             ")-- base_copy just pulls from base. Make sure the listed"
 42 |             "-- graph of CTEs all share the same dbt_cte__base cte"
 43 |             "select * from __dbt__cte__base where gender = 'Male'"
 44 |             "union all"
 45 |             "select * from __dbt__cte__base_copy where gender = 'Female'"
 46 |             ");"
 47 |         )
 48 |         sql_file = "".join(sql_file.split())
 49 |         expected_sql = "".join(expected_sql.split())
 50 |         assert sql_file == expected_sql
 51 | 
 52 | 
 53 | class TestEphemeralNested(BaseEphemeral):
 54 |     @pytest.fixture(scope="class")
 55 |     def models(self):
 56 |         return {
 57 |             "ephemeral_level_two.sql": models_n__ephemeral_level_two_sql,
 58 |             "root_view.sql": models_n__root_view_sql,
 59 |             "ephemeral.sql": models_n__ephemeral_sql,
 60 |             "source_table.sql": models_n__source_table_sql,
 61 |         }
 62 | 
 63 |     def test_ephemeral_nested(self, project):
 64 |         db = project.database
 65 | 
 66 |         results = run_dbt(["run"])
 67 |         assert len(results) == 2
 68 |         assert os.path.exists("./target/run/test/models/root_view.sql")
 69 |         with open("./target/run/test/models/root_view.sql", "r") as fp:
 70 |             sql_file = fp.read()
 71 | 
 72 |         sql_file = re.sub(r"\d+", "", sql_file)
 73 |         expected_sql = (
 74 |             f'create view "{db}"."test_test_ephemeral"."root_view__dbt_tmp" as ('
 75 |             "with __dbt__cte__ephemeral_level_two as ("
 76 |             f'select * from "{db}"."test_test_ephemeral"."source_table"'
 77 |             "),  __dbt__cte__ephemeral as ("
 78 |             "select * from __dbt__cte__ephemeral_level_two"
 79 |             ")select * from __dbt__cte__ephemeral"
 80 |             ");"
 81 |         )
 82 | 
 83 |         sql_file = "".join(sql_file.split())
 84 |         expected_sql = "".join(expected_sql.split())
 85 |         assert sql_file == expected_sql
 86 | 
 87 | 
 88 | class TestEphemeralErrorHandling(BaseEphemeral):
 89 |     @pytest.fixture(scope="class")
 90 |     def models(self):
 91 |         return {
 92 |             "dependent.sql": ephemeral_errors__dependent_sql,
 93 |             "base": {
 94 |                 "base.sql": ephemeral_errors__base__base_sql,
 95 |                 "base_copy.sql": ephemeral_errors__base__base_copy_sql,
 96 |             },
 97 |         }
 98 | 
 99 |     def test_ephemeral_error_handling(self, project):
100 |         results = run_dbt(["run"], expect_pass=False)
101 |         assert len(results) == 1
102 |         assert results[0].status == "skipped"
103 |         assert "Compilation Error" in results[0].message
104 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/motherduck/test_macros.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test that the generate database name macro is case insensitive
  3 | 
  4 | See DuckDB docs: https://duckdb.org/docs/sql/dialect/keywords_and_identifiers.html
  5 | 
  6 | "Identifiers in DuckDB are always case-insensitive, similarly to PostgreSQL.
  7 | However, unlike PostgreSQL (and some other major SQL implementations), DuckDB also
  8 | treats quoted identifiers as case-insensitive."
  9 | """
 10 | from urllib.parse import urlparse
 11 | import pytest
 12 | 
 13 | from dbt.tests.util import (
 14 |     run_dbt,
 15 |     check_result_nodes_by_name
 16 | )
 17 | from tests.functional.plugins.motherduck.fixtures import (
 18 |     models__gen_data_macro,
 19 |     macros__generate_database_name,
 20 |     macros__generate_schema_name,
 21 |     seeds__example_seed_csv,
 22 | )
 23 | 
 24 | 
 25 | @pytest.mark.skip_profile("buenavista", "file", "memory")
 26 | class TestMacrosGenerateDatabaseName:
 27 |     @pytest.fixture(scope="class")
 28 |     def database_name(self, dbt_profile_target, request):
 29 |         return urlparse(dbt_profile_target["path"]).path + "_ducky_ducky"
 30 | 
 31 |     @pytest.fixture(autouse=True)
 32 |     def run_dbt_scope(self, project, database_name):
 33 |         project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
 34 |         yield
 35 |         project.run_sql(f"DROP DATABASE {database_name}")
 36 | 
 37 |     @pytest.fixture(scope="class")
 38 |     def seeds(self):
 39 |         return {
 40 |             "seed.csv": seeds__example_seed_csv,
 41 |         }
 42 | 
 43 |     @pytest.fixture(scope="class")
 44 |     def models(self):
 45 |         return {
 46 |             "model.sql": models__gen_data_macro
 47 |         }
 48 | 
 49 |     @pytest.fixture(scope="class")
 50 |     def macros(self):
 51 |         return {
 52 |             "db_name.sql": macros__generate_database_name,
 53 |             "schema_name.sql": macros__generate_schema_name
 54 |         }
 55 |     
 56 |     @staticmethod
 57 |     def gen_project_config_update(build_env, org_prefix):
 58 |         return {
 59 |             "config-version": 2,
 60 |             "vars": {
 61 |                 "test": {
 62 |                     "build_env": build_env,
 63 |                     "org_prefix": org_prefix
 64 |                 },
 65 |             },
 66 |             "macro-paths": ["macros"],
 67 |         }
 68 | 
 69 |     @pytest.fixture(scope="class")
 70 |     def project_config_update(self):
 71 |         return self.gen_project_config_update("ducky", "ducky")
 72 | 
 73 |     def test_dbname_macro(self, project):
 74 |         # seed command
 75 |         results = run_dbt(["seed"])
 76 |         assert len(results) == 1
 77 |         check_result_nodes_by_name(results, ["seed"])
 78 | 
 79 |         for _ in range(3):
 80 |             results = run_dbt(["run"])
 81 |             assert len(results) == 1
 82 |             check_result_nodes_by_name(results, ["model"])
 83 | 
 84 | 
 85 | @pytest.mark.skip_profile("buenavista", "file", "memory")
 86 | class TestMacrosGenerateDatabaseNameUpperCase(TestMacrosGenerateDatabaseName):
 87 |     @pytest.fixture(scope="class")
 88 |     def database_name(self, dbt_profile_target, request):
 89 |         return urlparse(dbt_profile_target["path"]).path + "_ducky_ducky"
 90 | 
 91 |     @pytest.fixture(scope="class")
 92 |     def project_config_update(self):
 93 |         return self.gen_project_config_update("DUCKY", "DUCKY")
 94 | 
 95 | 
 96 | @pytest.mark.skip_profile("buenavista", "file", "memory")
 97 | class TestMacrosGenerateDatabaseNameLowerCase(TestMacrosGenerateDatabaseName):
 98 |     @pytest.fixture(scope="class")
 99 |     def database_name(self, dbt_profile_target, request):
100 |         return urlparse(dbt_profile_target["path"]).path + "_DUCKY_DUCKY"
101 | 
102 |     @pytest.fixture(scope="class")
103 |     def project_config_update(self):
104 |         return self.gen_project_config_update("ducky", "ducky")
105 | 
106 | 
107 | @pytest.mark.skip_profile("buenavista", "file", "memory")
108 | class TestMacrosGenerateDatabaseNameAllMixedCase(TestMacrosGenerateDatabaseName):
109 |     @pytest.fixture(scope="class")
110 |     def database_name(self, dbt_profile_target, request):
111 |         return urlparse(dbt_profile_target["path"]).path + "_dUcKy_DUckY"
112 | 
113 |     @pytest.fixture(scope="class")
114 |     def project_config_update(self):
115 |         return self.gen_project_config_update("DuCkY", "dUcKy")
116 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import resource
  3 | import subprocess
  4 | import time
  5 | from importlib import metadata
  6 | 
  7 | import duckdb
  8 | import pytest
  9 | 
 10 | # Increase the number of open files allowed
 11 | # Hack for https://github.com/dbt-labs/dbt-core/issues/7316
 12 | soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
 13 | resource.setrlimit(resource.RLIMIT_NOFILE, (hard_limit, hard_limit))
 14 | 
 15 | # Import the standard functional fixtures as a plugin
 16 | # Note: fixtures with session scope need to be local
 17 | pytest_plugins = ["dbt.tests.fixtures.project"]
 18 | 
 19 | MOTHERDUCK_TOKEN = "MOTHERDUCK_TOKEN"
 20 | TEST_MOTHERDUCK_TOKEN = "TEST_MOTHERDUCK_TOKEN"
 21 | 
 22 | 
 23 | def pytest_addoption(parser):
 24 |     parser.addoption("--profile", action="store", default="memory", type=str)
 25 | 
 26 | 
 27 | def pytest_report_header() -> list[str]:
 28 |     """Return a list of strings to be displayed in the header of the report."""
 29 |     return [
 30 |         f"duckdb: {metadata.version('duckdb')}",
 31 |         f"dbt-core: {metadata.version('dbt-core')}",
 32 |     ]
 33 | 
 34 | 
 35 | @pytest.fixture(scope="session")
 36 | def profile_type(request):
 37 |     return request.config.getoption("--profile")
 38 | 
 39 | 
 40 | @pytest.fixture(scope="session")
 41 | def bv_server_process(profile_type):
 42 |     if profile_type == "buenavista":
 43 |         server_process = subprocess.Popen(["python3", "-m", "tests.bv_test_server"])
 44 | 
 45 |         # Wait for the server to be ready
 46 |         time.sleep(5)
 47 | 
 48 |         # Pass the server process to the tests
 49 |         yield server_process
 50 | 
 51 |         # Teardown: Stop the server process after tests are done
 52 |         server_process.terminate()
 53 |         server_process.wait()
 54 |     else:
 55 |         yield None
 56 | 
 57 | 
 58 | # The profile dictionary, used to write out profiles.yml
 59 | # dbt will supply a unique schema per test, so we do not specify 'schema' here
 60 | @pytest.fixture(scope="session")
 61 | def dbt_profile_target(profile_type, bv_server_process, tmpdir_factory):
 62 |     profile = {"type": "duckdb", "threads": 4}
 63 | 
 64 |     if profile_type == "buenavista":
 65 |         profile["database"] = "memory"
 66 |         profile["remote"] = {
 67 |             "host": "127.0.0.1",
 68 |             "port": 5433,
 69 |             "user": "test",
 70 |         }
 71 |     elif profile_type == "file":
 72 |         profile["path"] = str(tmpdir_factory.mktemp("dbs") / "tmp.db")
 73 |     elif profile_type == "md":
 74 |         # Test against MotherDuck
 75 |         if MOTHERDUCK_TOKEN not in os.environ and MOTHERDUCK_TOKEN.lower() not in os.environ:
 76 |             if TEST_MOTHERDUCK_TOKEN not in os.environ:
 77 |                 raise ValueError(
 78 |                     f"Please set the {MOTHERDUCK_TOKEN} or {TEST_MOTHERDUCK_TOKEN} \
 79 |                         environment variable to run tests against MotherDuck"
 80 |                 )
 81 |             profile["token"] = os.environ.get(TEST_MOTHERDUCK_TOKEN)
 82 |         else:
 83 |             profile["token"] = os.environ.get(MOTHERDUCK_TOKEN, os.environ.get(MOTHERDUCK_TOKEN.lower()))
 84 |         profile["disable_transactions"] = True
 85 |         profile["path"] = "md:test"
 86 |     elif profile_type in ["memory", "nightly"]:
 87 |         pass  # use the default path-less profile
 88 |     else:
 89 |         raise ValueError(f"Invalid profile type '{profile_type}'")
 90 | 
 91 |     return profile
 92 | 
 93 | 
 94 | @pytest.fixture(autouse=True, scope="class")
 95 | def skip_by_profile_type(profile_type, request):
 96 |     if request.node.get_closest_marker("skip_profile"):
 97 |         for skip_profile_type in request.node.get_closest_marker("skip_profile").args:
 98 |             if skip_profile_type == profile_type:
 99 |                 pytest.skip(f"skipped on '{profile_type}' profile")
100 | 
101 | 
102 | @pytest.fixture(scope="session")
103 | def test_data_path():
104 |     test_dir = os.path.dirname(os.path.abspath(__file__))
105 |     return os.path.join(test_dir, "data")
106 | 
107 | 
108 | def pytest_collection_modifyitems(config, items):
109 |     # Skip the S3 tests if the secrets are not available
110 |     if not (
111 |         os.getenv("S3_MD_ORG_KEY") and os.getenv("S3_MD_ORG_REGION") and os.getenv("S3_MD_ORG_SECRET")
112 |     ):
113 |         skip_s3 = pytest.mark.skip(reason="need S3 credentials to run this test")
114 |         for item in items:
115 |             if "with_s3_creds" in item.keywords:
116 |                 item.add_marker(skip_s3)
117 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/utils/test_utils.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
  4 | from dbt.tests.adapter.utils.test_array_append import BaseArrayAppend
  5 | from dbt.tests.adapter.utils.test_array_concat import BaseArrayConcat
  6 | from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct
  7 | from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
  8 | from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
  9 | from dbt.tests.adapter.utils.test_concat import BaseConcat
 10 | from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
 11 | from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
 12 | from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 13 | from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 14 | from dbt.tests.adapter.utils.test_date_spine import BaseDateSpine
 15 | from dbt.tests.adapter.utils.test_escape_single_quotes import (
 16 |     BaseEscapeSingleQuotesQuote,
 17 | )
 18 | from dbt.tests.adapter.utils.test_except import BaseExcept
 19 | from dbt.tests.adapter.utils.test_generate_series import BaseGenerateSeries
 20 | from dbt.tests.adapter.utils.test_get_intervals_between import BaseGetIntervalsBetween
 21 | from dbt.tests.adapter.utils.test_get_powers_of_two import BaseGetPowersOfTwo
 22 | from dbt.tests.adapter.utils.test_hash import BaseHash
 23 | from dbt.tests.adapter.utils.test_intersect import BaseIntersect
 24 | from dbt.tests.adapter.utils.test_last_day import BaseLastDay
 25 | from dbt.tests.adapter.utils.test_length import BaseLength
 26 | from dbt.tests.adapter.utils.test_listagg import BaseListagg
 27 | from dbt.tests.adapter.utils.test_position import BasePosition
 28 | from dbt.tests.adapter.utils.test_replace import BaseReplace
 29 | from dbt.tests.adapter.utils.test_right import BaseRight
 30 | from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast
 31 | from dbt.tests.adapter.utils.test_split_part import BaseSplitPart
 32 | from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral
 33 | 
 34 | 
 35 | class TestAnyValue(BaseAnyValue):
 36 |     pass
 37 | 
 38 | 
 39 | class TestBoolOr(BaseBoolOr):
 40 |     pass
 41 | 
 42 | 
 43 | class TestCastBoolToText(BaseCastBoolToText):
 44 |     pass
 45 | 
 46 | 
 47 | class TestConcat(BaseConcat):
 48 |     pass
 49 | 
 50 | 
 51 | class TestDateAdd(BaseDateAdd):
 52 |     pass
 53 | 
 54 | 
 55 | class TestDateDiff(BaseDateDiff):
 56 |     pass
 57 | 
 58 | 
 59 | # Skipping this b/c the upstream utils test
 60 | # is irritatingly adapter-specific at the moment
 61 | @pytest.mark.skip
 62 | class TestDateSpine(BaseDateSpine):
 63 |     pass
 64 | 
 65 | class TestDateTrunc(BaseDateTrunc):
 66 |     pass
 67 | 
 68 | 
 69 | class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote):
 70 |     pass
 71 | 
 72 | 
 73 | class TestGenerateSeries(BaseGenerateSeries):
 74 |     pass
 75 | 
 76 | 
 77 | # Skipping this b/c the upstream utils test
 78 | # is irritatingly adapter-specific at the moment
 79 | @pytest.mark.skip
 80 | class TestGetIntervalsBetween(BaseGetIntervalsBetween):
 81 |     pass
 82 | 
 83 | 
 84 | class TestGetPowersOfTwo(BaseGetPowersOfTwo):
 85 |     pass
 86 | 
 87 | 
 88 | class TestExcept(BaseExcept):
 89 |     pass
 90 | 
 91 | 
 92 | class TestHash(BaseHash):
 93 |     pass
 94 | 
 95 | 
 96 | class TestIntersect(BaseIntersect):
 97 |     pass
 98 | 
 99 | 
100 | class TestLastDay(BaseLastDay):
101 |     pass
102 | 
103 | 
104 | class TestLength(BaseLength):
105 |     pass
106 | 
107 | 
108 | # NOTE: list_agg relies on an ORDER BY construct MD does not yet support
109 | @pytest.mark.skip_profile("md")
110 | class TestListagg(BaseListagg):
111 |     pass
112 | 
113 | 
114 | class TestPosition(BasePosition):
115 |     pass
116 | 
117 | 
118 | class TestReplace(BaseReplace):
119 |     pass
120 | 
121 | 
122 | class TestRight(BaseRight):
123 |     pass
124 | 
125 | 
126 | class TestSafeCast(BaseSafeCast):
127 |     pass
128 | 
129 | 
130 | class TestSplitPart(BaseSplitPart):
131 |     pass
132 | 
133 | 
134 | class TestStringLiteral(BaseStringLiteral):
135 |     pass
136 | 
137 | 
138 | class TestArrayAppend(BaseArrayAppend):
139 |     pass
140 | 
141 | 
142 | class TestArrayConcat(BaseArrayConcat):
143 |     pass
144 | 
145 | 
146 | class TestArrayConstruct(BaseArrayConstruct):
147 |     pass
148 | 
149 | 
150 | # Skipping this while we sort out what the right default is
151 | # here: https://github.com/duckdb/duckdb/issues/7934
152 | @pytest.mark.skip
153 | class TestCurrentTimestamp(BaseCurrentTimestampNaive):
154 |     pass
155 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_write_options.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | from dbt.tests.adapter.basic.files import (
  4 |     base_table_sql,
  5 |     model_base,
  6 |     schema_base_yml,
  7 |     seeds_base_csv,
  8 | )
  9 | from dbt.tests.util import (
 10 |     check_relation_types,
 11 |     check_relations_equal,
 12 |     check_result_nodes_by_name,
 13 |     relation_from_name,
 14 |     run_dbt,
 15 | )
 16 | 
 17 | config_write_csv_delim_options = """
 18 |   {{ config(materialized="external", format="csv", options={"delimiter": "|"}) }}
 19 | """
 20 | 
 21 | config_write_codec_options = """
 22 |   {{ config(materialized="external", options={"codec": "zstd"}) }}
 23 | """
 24 | 
 25 | config_write_partition_by_id = """
 26 |     {{ config(materialized="external", options={"partition_by": "id", "codec": "zstd"}) }}
 27 | """
 28 | 
 29 | config_write_partition_by_id_name = """
 30 |     {{ config(materialized="external", options={"partition_by": "id, name"}) }}
 31 | """
 32 | 
 33 | csv_delim_options_sql = config_write_csv_delim_options + model_base
 34 | write_codec_options = config_write_codec_options + model_base
 35 | config_write_partition_by_id_sql = config_write_partition_by_id + model_base
 36 | config_write_partition_by_id_name_sql = config_write_partition_by_id_name + model_base
 37 | 
 38 | 
 39 | class BaseExternalMaterializations:
 40 | 
 41 |     @pytest.fixture(scope="class")
 42 |     def dbt_profile_target(self, dbt_profile_target, tmp_path_factory):
 43 |         extroot = str(tmp_path_factory.getbasetemp() / "write_options")
 44 |         os.mkdir(extroot)
 45 |         dbt_profile_target["external_root"] = extroot
 46 |         return dbt_profile_target
 47 | 
 48 |     @pytest.fixture(scope="class")
 49 |     def models(self):
 50 |         return {
 51 |             "table_model.sql": base_table_sql,
 52 |             "csv_delim_options.sql": csv_delim_options_sql,
 53 |             "write_codec_options.sql": write_codec_options,
 54 |             "config_write_partition_by_id.sql": config_write_partition_by_id_sql,
 55 |             "config_write_partition_by_id_name.sql": config_write_partition_by_id_name_sql,
 56 |             "schema.yml": schema_base_yml,
 57 |         }
 58 | 
 59 |     @pytest.fixture(scope="class")
 60 |     def seeds(self):
 61 |         return {
 62 |             "base.csv": seeds_base_csv,
 63 |         }
 64 | 
 65 |     @pytest.fixture(scope="class")
 66 |     def project_config_update(self):
 67 |         return {
 68 |             "name": "base",
 69 |         }
 70 | 
 71 |     def test_base(self, project):
 72 | 
 73 |         # seed command
 74 |         results = run_dbt(["seed"])
 75 |         # seed result length
 76 |         assert len(results) == 1
 77 | 
 78 |         # run command
 79 |         results = run_dbt()
 80 |         # run result length
 81 |         assert len(results) == 5
 82 | 
 83 |         # names exist in result nodes
 84 |         check_result_nodes_by_name(
 85 |             results,
 86 |             [
 87 |                 "table_model",
 88 |                 "csv_delim_options",
 89 |                 "write_codec_options",
 90 |                 "config_write_partition_by_id",
 91 |                 "config_write_partition_by_id_name",
 92 |             ],
 93 |         )
 94 | 
 95 |         # check relation types
 96 |         expected = {
 97 |             "base": "table",
 98 |             "table_model": "table",
 99 |             "csv_delim_options": "view",
100 |             "write_codec_options": "view",
101 |             "config_write_partition_by_id": "view",
102 |             "config_write_partition_by_id_name": "view",
103 |         }
104 |         check_relation_types(project.adapter, expected)
105 | 
106 |         # base table rowcount
107 |         relation = relation_from_name(project.adapter, "base")
108 |         result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
109 |         assert result[0] == 10
110 | 
111 |         # relations_equal
112 |         check_relations_equal(
113 |             project.adapter,
114 |             [
115 |                 "base",
116 |                 "csv_delim_options",
117 |                 "write_codec_options",
118 |                 "config_write_partition_by_id",
119 |                 "config_write_partition_by_id_name",
120 |             ],
121 |         )
122 | 
123 |         # check relations in catalog
124 |         catalog = run_dbt(["docs", "generate"])
125 |         assert len(catalog.nodes) == 6
126 |         assert len(catalog.sources) == 1
127 | 
128 | 
129 | class TestExternalMaterializations(BaseExternalMaterializations):
130 |     pass
131 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/indexes/test_indexes.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import re
  3 | from dbt.tests.util import (
  4 |     run_dbt,
  5 |     run_dbt_and_capture,
  6 | )
  7 | from tests.functional.adapter.indexes.fixtures import (
  8 |     models__incremental_sql,
  9 |     models__table_sql,
 10 |     seeds__seed_csv,
 11 |     snapshots__colors_sql,
 12 | )
 13 | 
 14 | 
 15 | INDEX_DEFINITION_PATTERN = re.compile(r"\((.*?)\)")
 16 | 
 17 | 
 18 | class TestIndex:
 19 |     @pytest.fixture(scope="class")
 20 |     def models(self):
 21 |         return {
 22 |             "table.sql": models__table_sql,
 23 |             "incremental.sql": models__incremental_sql,
 24 |         }
 25 | 
 26 |     @pytest.fixture(scope="class")
 27 |     def seeds(self):
 28 |         return {"seed.csv": seeds__seed_csv}
 29 | 
 30 |     @pytest.fixture(scope="class")
 31 |     def snapshots(self):
 32 |         return {"colors.sql": snapshots__colors_sql}
 33 | 
 34 |     @pytest.fixture(scope="class")
 35 |     def project_config_update(self):
 36 |         return {
 37 |             "config-version": 2,
 38 |             "seeds": {
 39 |                 "quote_columns": False,
 40 |                 "indexes": [
 41 |                     {"columns": ["country_code"], "unique": False},
 42 |                     {"columns": ["country_code", "country_name"], "unique": True},
 43 |                 ],
 44 |             },
 45 |             "vars": {
 46 |                 "version": 1,
 47 |             },
 48 |         }
 49 | 
 50 |     def test_table(self, project, unique_schema):
 51 |         for _ in range(2):
 52 |             results = run_dbt(["run", "--models", "table"])
 53 |             assert len(results) == 1
 54 | 
 55 |             indexes = self.get_indexes("table", project, unique_schema)
 56 |             expected = [
 57 |                 {"columns": "column_a", "unique": False},
 58 |                 {"columns": "column_b", "unique": False},
 59 |                 {"columns": "column_a, column_b", "unique": False},
 60 |                 {"columns": "column_b, column_a", "unique": True},
 61 |                 {"columns": "column_a", "unique": False},
 62 |             ]
 63 |             assert len(indexes) == len(expected)
 64 | 
 65 |     def test_incremental(self, project, unique_schema):
 66 |         for additional_argument in [[], [], ["--full-refresh"]]:
 67 |             results = run_dbt(["run", "--models", "incremental"] + additional_argument)
 68 |             assert len(results) == 1
 69 | 
 70 |             indexes = self.get_indexes("incremental", project, unique_schema)
 71 |             expected = [
 72 |                 {"columns": "column_a", "unique": False},
 73 |                 {"columns": "column_a, column_b", "unique": True},
 74 |             ]
 75 |             assert len(indexes) == len(expected)
 76 | 
 77 |     def test_seed(self, project, unique_schema):
 78 |         for additional_argument in [[], [], ["--full-refresh"]]:
 79 |             results = run_dbt(["seed"] + additional_argument)
 80 |             assert len(results) == 1
 81 | 
 82 |             indexes = self.get_indexes("seed", project, unique_schema)
 83 |             expected = [
 84 |                 {"columns": "country_code", "unique": False},
 85 |                 {
 86 |                     "columns": "country_code, country_name",
 87 |                     "unique": True,
 88 |                 },
 89 |             ]
 90 |             assert len(indexes) == len(expected)
 91 | 
 92 |     def test_snapshot(self, project, unique_schema):
 93 |         for version in [1, 2]:
 94 |             results = run_dbt(["snapshot", "--vars", f"version: {version}"])
 95 |             assert len(results) == 1
 96 | 
 97 |             indexes = self.get_indexes("colors", project, unique_schema)
 98 |             expected = [
 99 |                 {"columns": "id", "unique": False},
100 |                 {"columns": "id, color", "unique": True},
101 |             ]
102 |             assert len(indexes) == len(expected)
103 | 
104 |     def get_indexes(self, table_name, project, unique_schema):
105 |         sql = f"""
106 |             SELECT
107 |               sql as index_definition, is_unique
108 |             FROM duckdb_indexes()
109 |             WHERE
110 |               schema_name = '{unique_schema}'
111 |               AND 
112 |               table_name = '{table_name}'
113 |         """
114 |         results = project.run_sql(sql, fetch="all")
115 |         return [self.parse_index_definition(row[0], row[1]) for row in results]
116 | 
117 |     def parse_index_definition(self, index_definition, is_unique):
118 |         index_definition = index_definition.lower()
119 |         m = INDEX_DEFINITION_PATTERN.search(index_definition)
120 |         return {
121 |             "columns": m.group(1),
122 |             "unique": is_unique,
123 |         }
124 | 
125 |     
126 | 


--------------------------------------------------------------------------------
/tests/unit/utils.py:
--------------------------------------------------------------------------------
  1 | """Unit test utility functions.
  2 | 
  3 | Note that all imports should be inside the functions to avoid import/mocking
  4 | issues.
  5 | """
  6 | import os
  7 | from unittest import mock
  8 | 
  9 | from dbt.config.project import PartialProject
 10 | 
 11 | 
 12 | def normalize(path):
 13 |     """On windows, neither is enough on its own:
 14 | 
 15 |     >>> normcase('C:\\documents/ALL CAPS/subdir\\..')
 16 |     'c:\\documents\\all caps\\subdir\\..'
 17 |     >>> normpath('C:\\documents/ALL CAPS/subdir\\..')
 18 |     'C:\\documents\\ALL CAPS'
 19 |     >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..'))
 20 |     'c:\\documents\\all caps'
 21 |     """
 22 |     return os.path.normcase(os.path.normpath(path))
 23 | 
 24 | 
 25 | class Obj:
 26 |     which = "blah"
 27 |     single_threaded = False
 28 | 
 29 | 
 30 | def mock_connection(name):
 31 |     conn = mock.MagicMock()
 32 |     conn.name = name
 33 |     return conn
 34 | 
 35 | 
 36 | def profile_from_dict(profile, profile_name, cli_vars="{}"):
 37 |     from dbt.config import Profile
 38 |     from dbt.config.renderer import ProfileRenderer
 39 |     from dbt.config.utils import parse_cli_vars
 40 | 
 41 |     if not isinstance(cli_vars, dict):
 42 |         cli_vars = parse_cli_vars(cli_vars)
 43 | 
 44 |     renderer = ProfileRenderer(cli_vars)
 45 |     return Profile.from_raw_profile_info(
 46 |         profile,
 47 |         profile_name,
 48 |         renderer,
 49 |     )
 50 | 
 51 | 
 52 | def project_from_dict(project, profile, packages=None, selectors=None, cli_vars="{}"):
 53 |     from dbt.config.renderer import DbtProjectYamlRenderer
 54 |     from dbt.config.utils import parse_cli_vars
 55 | 
 56 |     if not isinstance(cli_vars, dict):
 57 |         cli_vars = parse_cli_vars(cli_vars)
 58 | 
 59 |     renderer = DbtProjectYamlRenderer(profile, cli_vars)
 60 | 
 61 |     project_root = project.pop("project-root", os.getcwd())
 62 | 
 63 |     partial = PartialProject.from_dicts(
 64 |         project_root=project_root,
 65 |         project_dict=project,
 66 |         packages_dict=packages,
 67 |         selectors_dict=selectors,
 68 |     )
 69 |     return partial.render(renderer)
 70 | 
 71 | 
 72 | def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars="{}"):
 73 |     from copy import deepcopy
 74 | 
 75 |     from dbt.config import Profile, Project, RuntimeConfig
 76 | 
 77 |     if isinstance(project, Project):
 78 |         profile_name = project.profile_name
 79 |     else:
 80 |         profile_name = project.get("profile")
 81 | 
 82 |     if not isinstance(profile, Profile):
 83 |         profile = profile_from_dict(
 84 |             deepcopy(profile),
 85 |             profile_name,
 86 |             cli_vars,
 87 |         )
 88 | 
 89 |     if not isinstance(project, Project):
 90 |         project = project_from_dict(
 91 |             deepcopy(project),
 92 |             profile,
 93 |             packages,
 94 |             selectors,
 95 |             cli_vars,
 96 |         )
 97 | 
 98 |     args = Obj()
 99 |     args.vars = cli_vars
100 |     args.profile_dir = "/dev/null"
101 |     return RuntimeConfig.from_parts(project=project, profile=profile, args=args)
102 | 
103 | 
104 | def inject_plugin(plugin):
105 |     from dbt.adapters.factory import FACTORY
106 | 
107 |     key = plugin.adapter.type()
108 |     FACTORY.plugins[key] = plugin
109 | 
110 | 
111 | def inject_adapter(value, plugin):
112 |     """Inject the given adapter into the adapter factory, so your hand-crafted
113 |     artisanal adapter will be available from get_adapter() as if dbt loaded it.
114 |     """
115 |     inject_plugin(plugin)
116 |     from dbt.adapters.factory import FACTORY
117 | 
118 |     key = value.type()
119 |     FACTORY.adapters[key] = value
120 | 
121 | 
122 | def generate_name_macros(package):
123 |     from dbt.contracts.graph.parsed import ParsedMacro
124 |     from dbt.node_types import NodeType
125 | 
126 |     name_sql = {}
127 |     for component in ("database", "schema", "alias"):
128 |         if component == "alias":
129 |             source = "node.name"
130 |         else:
131 |             source = f"target.{component}"
132 |         name = f"generate_{component}_name"
133 |         sql = f"{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}"
134 |         name_sql[name] = sql
135 | 
136 |     all_sql = "\n".join(name_sql.values())
137 |     for name, sql in name_sql.items():
138 |         pm = ParsedMacro(
139 |             name=name,
140 |             resource_type=NodeType.Macro,
141 |             unique_id=f"macro.{package}.{name}",
142 |             package_name=package,
143 |             original_file_path=normalize("macros/macro.sql"),
144 |             root_path="./dbt_modules/root",
145 |             path=normalize("macros/macro.sql"),
146 |             raw_sql=all_sql,
147 |             macro_sql=sql,
148 |         )
149 |         yield pm
150 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/plugins/postgres.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | from typing import Dict
  3 | from typing import List
  4 | from typing import Optional
  5 | from typing import Tuple
  6 | 
  7 | from duckdb import DuckDBPyConnection
  8 | 
  9 | from . import BasePlugin
 10 | from dbt.adapters.events.logging import AdapterLogger
 11 | 
 12 | PG_EXT = "postgres"
 13 | 
 14 | 
 15 | class Plugin(BasePlugin):
 16 |     logger = AdapterLogger("DuckDB_PostgresPlugin")
 17 | 
 18 |     def __init__(self, name: str, plugin_config: Dict[str, Any]):
 19 |         """
 20 |         Initialize the Plugin with a name and configuration.
 21 |         """
 22 |         super().__init__(name, plugin_config)
 23 |         self.logger.debug(
 24 |             "Plugin __init__ called with name: %s and config: %s", name, plugin_config
 25 |         )
 26 |         self.initialize(plugin_config)
 27 | 
 28 |     def initialize(self, config: Dict[str, Any]):
 29 |         """
 30 |         Initialize the plugin with the provided configuration.
 31 |         """
 32 |         self.logger.debug("Initializing PostgreSQL plugin with config: %s", config)
 33 | 
 34 |         self._dsn: str = config["dsn"]
 35 |         if not self._dsn:
 36 |             self.logger.error(
 37 |                 "Initialization failed: 'dsn' is a required argument for the postgres plugin!"
 38 |             )
 39 |             raise ValueError("'dsn' is a required argument for the postgres plugin!")
 40 | 
 41 |         self._pg_schema: Optional[str] = config.get("pg_schema")  # Can be None
 42 |         self._duckdb_alias: str = config.get("duckdb_alias", "postgres_db")
 43 |         self._read_only: bool = config.get("read_only", False)
 44 |         self._secret: Optional[str] = config.get("secret")
 45 |         self._attach_options: Dict[str, Any] = config.get(
 46 |             "attach_options", {}
 47 |         )  # Additional ATTACH options
 48 |         self._settings: Dict[str, Any] = config.get(
 49 |             "settings", {}
 50 |         )  # Extension settings via SET commands
 51 | 
 52 |         self.logger.info(
 53 |             "PostgreSQL plugin initialized with dsn='%s', pg_schema='%s', "
 54 |             "duckdb_alias='%s', read_only=%s, secret='%s'",
 55 |             self._dsn,
 56 |             self._pg_schema,
 57 |             self._duckdb_alias,
 58 |             self._read_only,
 59 |             self._secret,
 60 |         )
 61 | 
 62 |     def configure_connection(self, conn: DuckDBPyConnection):
 63 |         """
 64 |         Configure the DuckDB connection to attach the PostgreSQL database.
 65 |         """
 66 |         self.logger.debug("Configuring DuckDB connection for PostgreSQL plugin.")
 67 | 
 68 |         conn.install_extension(PG_EXT)
 69 |         conn.load_extension(PG_EXT)
 70 |         self.logger.info("PostgreSQL extension installed and loaded.")
 71 | 
 72 |         # Set any extension settings provided
 73 |         self._set_extension_settings(conn)
 74 | 
 75 |         # Build and execute the ATTACH command
 76 |         attach_stmt = self._build_attach_statement()
 77 |         self.logger.debug("Executing ATTACH statement: %s", attach_stmt)
 78 |         try:
 79 |             conn.execute(attach_stmt)
 80 |             self.logger.info("Successfully attached PostgreSQL database with DSN: %s", self._dsn)
 81 |         except Exception as e:
 82 |             self.logger.error("Failed to attach PostgreSQL database: %s", e)
 83 |             raise
 84 | 
 85 |     def _set_extension_settings(self, conn: DuckDBPyConnection):
 86 |         """
 87 |         Set extension settings via SET commands.
 88 |         """
 89 |         for setting, value in self._settings.items():
 90 |             # Quote string values
 91 |             if isinstance(value, str):
 92 |                 value = f"'{value}'"
 93 |             elif isinstance(value, bool):
 94 |                 value = "true" if value else "false"
 95 |             set_stmt = f"SET {setting} = {value};"
 96 |             self.logger.debug("Setting extension option: %s", set_stmt)
 97 |             try:
 98 |                 conn.execute(set_stmt)
 99 |             except Exception as e:
100 |                 self.logger.error("Failed to set option %s: %s", setting, e)
101 |                 raise
102 | 
103 |     def _build_attach_statement(self) -> str:
104 |         """
105 |         Build the ATTACH statement for connecting to the PostgreSQL database.
106 |         """
107 |         attach_options: List[Tuple[str, Optional[str]]] = [("TYPE", "POSTGRES")]
108 | 
109 |         if self._pg_schema:
110 |             attach_options.append(("SCHEMA", f"'{self._pg_schema}'"))
111 | 
112 |         if self._secret:
113 |             attach_options.append(("SECRET", f"'{self._secret}'"))
114 | 
115 |         # Additional attach options
116 |         for k, v in self._attach_options.items():
117 |             if isinstance(v, bool):
118 |                 v = "true" if v else "false"
119 |             elif isinstance(v, str):
120 |                 v = f"'{v}'"
121 |             attach_options.append((k.upper(), v))
122 | 
123 |         if self._read_only:
124 |             attach_options.append(("READ_ONLY", None))  # No value assigned
125 | 
126 |         # Convert options to string
127 |         attach_options_str = ", ".join(
128 |             f"{k} {v}" if v is not None else k for k, v in attach_options
129 |         )
130 | 
131 |         attach_stmt = f"ATTACH '{self._dsn}' AS {self._duckdb_alias} ({attach_options_str});"
132 |         return attach_stmt
133 | 


--------------------------------------------------------------------------------
/dbt/adapters/duckdb/connections.py:
--------------------------------------------------------------------------------
  1 | import atexit
  2 | import threading
  3 | from contextlib import contextmanager
  4 | from multiprocessing.context import SpawnContext
  5 | from typing import Optional
  6 | from typing import Set
  7 | from typing import Tuple
  8 | from typing import TYPE_CHECKING
  9 | 
 10 | import dbt.exceptions
 11 | from . import environments
 12 | from dbt.adapters.contracts.connection import AdapterRequiredConfig
 13 | from dbt.adapters.contracts.connection import AdapterResponse
 14 | from dbt.adapters.contracts.connection import Connection
 15 | from dbt.adapters.contracts.connection import ConnectionState
 16 | from dbt.adapters.events.logging import AdapterLogger
 17 | from dbt.adapters.sql import SQLConnectionManager
 18 | 
 19 | logger = AdapterLogger("DuckDB")
 20 | 
 21 | if TYPE_CHECKING:
 22 |     import agate
 23 | 
 24 | 
 25 | class DuckDBConnectionManager(SQLConnectionManager):
 26 |     TYPE = "duckdb"
 27 |     _LOCK = threading.RLock()
 28 |     _ENV = None
 29 |     _LOGGED_MESSAGES: Set[str] = set()
 30 | 
 31 |     def __init__(self, config: AdapterRequiredConfig, mp_context: SpawnContext) -> None:
 32 |         super().__init__(config, mp_context)
 33 |         self.disable_transactions = config.credentials.disable_transactions  # type: ignore
 34 | 
 35 |     @classmethod
 36 |     def env(cls) -> environments.Environment:
 37 |         with cls._LOCK:
 38 |             if not cls._ENV:
 39 |                 raise Exception("DuckDBConnectionManager environment requested before creation!")
 40 |             return cls._ENV
 41 | 
 42 |     @classmethod
 43 |     def open(cls, connection: Connection) -> Connection:
 44 |         if connection.state == ConnectionState.OPEN:
 45 |             logger.debug("Connection is already open, skipping open.")
 46 |             return connection
 47 | 
 48 |         credentials = cls.get_credentials(connection.credentials)
 49 |         with cls._LOCK:
 50 |             try:
 51 |                 if not cls._ENV or cls._ENV.creds != credentials:
 52 |                     cls._ENV = environments.create(credentials)
 53 |                 connection.handle = cls._ENV.handle()
 54 |                 connection.state = ConnectionState.OPEN
 55 | 
 56 |             except RuntimeError as e:
 57 |                 logger.debug("Got an error when attempting to connect to DuckDB: '{}'".format(e))
 58 |                 connection.handle = None
 59 |                 connection.state = ConnectionState.FAIL
 60 |                 raise dbt.adapters.exceptions.FailedToConnectError(str(e))
 61 | 
 62 |             return connection
 63 | 
 64 |     @classmethod
 65 |     def close(cls, connection: Connection) -> Connection:
 66 |         # if the connection is in closed or init, there's nothing to do
 67 |         if connection.state in {ConnectionState.CLOSED, ConnectionState.INIT}:
 68 |             return connection
 69 | 
 70 |         connection = super(SQLConnectionManager, cls).close(connection)
 71 |         return connection
 72 | 
 73 |     @classmethod
 74 |     def warn_once(cls, msg: str):
 75 |         """Post a warning message once per dbt execution."""
 76 |         with cls._LOCK:
 77 |             if msg in cls._LOGGED_MESSAGES:
 78 |                 return
 79 |             cls._LOGGED_MESSAGES.add(msg)
 80 |             logger.warning(msg)
 81 | 
 82 |     def cancel(self, connection: Connection):
 83 |         if self._ENV is not None:
 84 |             logger.debug(
 85 |                 "cancelling query on connection {}. Details: {}".format(
 86 |                     connection.name, connection
 87 |                 )
 88 |             )
 89 |             self._ENV.cancel(connection)
 90 |             logger.debug("query cancelled on connection {}".format(connection.name))
 91 | 
 92 |     @contextmanager
 93 |     def exception_handler(self, sql: str, connection_name="master"):
 94 |         try:
 95 |             yield
 96 |         except dbt.exceptions.DbtRuntimeError:
 97 |             raise
 98 |         except RuntimeError as e:
 99 |             logger.debug("duckdb error: {}".format(str(e)))
100 |             logger.debug("Error running SQL: {}".format(sql))
101 |             # Preserve original RuntimeError with full context instead of swallowing
102 |             raise dbt.exceptions.DbtRuntimeError(str(e)) from e
103 |         except Exception as exc:
104 |             logger.debug("duckdb error: {}".format(str(exc)))
105 |             logger.debug("Error running SQL: {}".format(sql))
106 |             logger.debug("Rolling back transaction.")
107 |             raise dbt.exceptions.DbtRuntimeError(str(exc)) from exc
108 | 
109 |     @classmethod
110 |     def get_credentials(cls, credentials):
111 |         return credentials
112 | 
113 |     @classmethod
114 |     def get_response(cls, cursor) -> AdapterResponse:
115 |         # https://github.com/dbt-labs/dbt-spark/issues/142
116 |         message = "OK"
117 |         return AdapterResponse(_message=message)
118 | 
119 |     @classmethod
120 |     def close_all_connections(cls):
121 |         with cls._LOCK:
122 |             if cls._ENV is not None:
123 |                 cls._ENV = None
124 | 
125 |     def execute(
126 |         self,
127 |         sql: str,
128 |         auto_begin: bool = False,
129 |         fetch: bool = False,
130 |         limit: Optional[int] = None,
131 |     ) -> Tuple[AdapterResponse, "agate.Table"]:
132 |         if self.disable_transactions:
133 |             auto_begin = False
134 |         return super().execute(sql, auto_begin, fetch, limit)
135 | 
136 | 
137 | atexit.register(DuckDBConnectionManager.close_all_connections)
138 | 


--------------------------------------------------------------------------------
/tests/functional/plugins/motherduck/test_motherduck_attach.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from dbt.tests.util import (
  3 |     run_dbt,
  4 | )
  5 | 
  6 | random_logs_sql = """
  7 | {{ config(materialized='table', meta=dict(temp_schema_name='dbt_temp_test')) }}
  8 | 
  9 | select 
 10 |   uuid()::varchar as log_id,
 11 |  '2023-10-01'::timestamp + interval 1 minute * (random() * 20000)::int as dt ,
 12 | (random() * 4)::int64 as user_id
 13 | from generate_series(1, 10000) g(x)
 14 | """
 15 | 
 16 | summary_of_logs_sql = """
 17 | {{
 18 |     config(
 19 |         materialized='incremental',
 20 |         meta=dict(temp_schema_name='dbt_temp_test'),
 21 |     )
 22 | }}
 23 | 
 24 | select dt::date as dt, user_id, count(1) as c
 25 | from {{ ref('random_logs_test') }}
 26 | 
 27 | 
 28 | {% if is_incremental() %}
 29 | 
 30 |   -- this filter will only be applied on an incremental run
 31 |   -- (uses > to include records whose timestamp occurred since the last run of this model)
 32 |   where dt > '2023-10-08'::timestamp
 33 | 
 34 | {% endif %}
 35 | group by all
 36 | """
 37 | 
 38 | python_pyarrow_table_model = """
 39 | import pyarrow as pa
 40 | 
 41 | def model(dbt, con):
 42 |     return pa.Table.from_pydict({"a": [1,2,3]})
 43 | """
 44 | 
 45 | @pytest.mark.skip_profile("buenavista", "file", "memory")
 46 | class TestMDPluginAttach:
 47 |     @pytest.fixture(scope="class")
 48 |     def profiles_config_update(self, dbt_profile_target, test_database_name):
 49 |         md_config = {"token": dbt_profile_target.get("token")}
 50 |         plugins = [{"module": "motherduck", "config": md_config}]
 51 |         return {
 52 |             "test": {
 53 |                 "outputs": {
 54 |                     "dev": {
 55 |                         "type": "duckdb",
 56 |                         "path": ":memory:",
 57 |                         "plugins": plugins,
 58 |                         "attach": [
 59 |                             {
 60 |                                 "path": f"md:{test_database_name}",
 61 |                                 "type": "motherduck"
 62 |                             }
 63 |                         ]
 64 |                     }
 65 |                 },
 66 |                 "target": "dev",
 67 |             }
 68 |         }
 69 | 
 70 |     @pytest.fixture(scope="class")
 71 |     def models(self, md_sql):
 72 |         return {
 73 |             "md_table.sql": md_sql,
 74 |             "random_logs_test.sql": random_logs_sql,
 75 |             "summary_of_logs_test.sql": summary_of_logs_sql,
 76 |             "python_pyarrow_table_model.py": python_pyarrow_table_model,
 77 |         }
 78 | 
 79 |     @pytest.fixture(scope="class")
 80 |     def md_sql(self, test_database_name):
 81 |         # Reads from a MD database in my test account in the cloud
 82 |         return f"""
 83 |             select * FROM {test_database_name}.main.plugin_table
 84 |         """
 85 |     
 86 |     @pytest.fixture(autouse=True)
 87 |     def run_dbt_scope(self, project, test_database_name):
 88 |         project.run_sql(f"CREATE OR REPLACE TABLE {test_database_name}.plugin_table (i integer, j string)")
 89 |         project.run_sql(f"INSERT INTO {test_database_name}.plugin_table (i, j) VALUES (1, 'foo')")
 90 |         yield
 91 |         project.run_sql("DROP VIEW IF EXISTS md_table")
 92 |         project.run_sql("DROP TABLE IF EXISTS random_logs_test")
 93 |         project.run_sql("DROP TABLE IF EXISTS summary_of_logs_test")
 94 |         project.run_sql(f"DROP TABLE IF EXISTS {test_database_name}.plugin_table")
 95 |         project.run_sql("DROP TABLE IF EXISTS python_pyarrow_table_model")
 96 | 
 97 |     def test_motherduck(self, project):
 98 |         run_dbt(expect_pass=True)
 99 | 
100 | 
101 | @pytest.mark.skip_profile("buenavista", "file", "memory")
102 | class TestMDPluginAttachWithSettings(TestMDPluginAttach):
103 |     @pytest.fixture(scope="class")
104 |     def profiles_config_update(self, dbt_profile_target, test_database_name):
105 |         md_setting = {"motherduck_token": dbt_profile_target.get("token")}
106 |         return {
107 |             "test": {
108 |                 "outputs": {
109 |                     "dev": {
110 |                         "type": "duckdb",
111 |                         "path": ":memory:",
112 |                         "attach": [
113 |                             {
114 |                                 "path": f"md:{test_database_name}",
115 |                                 "type": "motherduck"
116 |                             }
117 |                         ],
118 |                         "settings": md_setting
119 |                     }
120 |                 },
121 |                 "target": "dev",
122 |             }
123 |         }
124 | 
125 | 
126 | @pytest.mark.skip_profile("buenavista", "file", "memory")
127 | class TestMDPluginAttachWithTokenInPath(TestMDPluginAttach):
128 |     @pytest.fixture(scope="class")
129 |     def profiles_config_update(self, dbt_profile_target, test_database_name):
130 |         token = dbt_profile_target.get("token")
131 |         return {
132 |             "test": {
133 |                 "outputs": {
134 |                     "dev": {
135 |                         "type": "duckdb",
136 |                         "path": ":memory:",
137 |                         "attach": [
138 |                             {
139 |                                 "path": f"md:{test_database_name}?motherduck_token={token}&user=1",
140 |                                 "type": "motherduck"
141 |                             }
142 |                         ]
143 |                     }
144 |                 },
145 |                 "target": "dev",
146 |             }
147 |         }
148 | 


--------------------------------------------------------------------------------
/tests/functional/adapter/test_table_function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test that table functions work as intended.
  3 | See README for reasons to use this materialization approach!
  4 | """
  5 | import pytest
  6 | 
  7 | from dbt.tests.util import (
  8 |     run_dbt,
  9 |     check_result_nodes_by_name,
 10 |     relation_from_name
 11 | )
 12 | 
 13 | seeds__example_seed_csv = """a,b,c
 14 | 1,2,3
 15 | 4,5,6
 16 | 7,8,9
 17 | """
 18 | 
 19 | models__example_table = """
 20 | {{ config(materialized='table') }}
 21 | select * from {{ ref("seed") }}
 22 | """
 23 | 
 24 | models__my_table_function = """
 25 | {{ config(materialized='table_function') }}
 26 | select * from {{ ref("example_table") }}
 27 | """
 28 | 
 29 | models__use_table_function = """
 30 | {{ config(materialized='table') }}
 31 | select * from {{ ref("my_table_function") }}()
 32 | """
 33 | 
 34 | models__my_table_function_1_param = """
 35 | {{ config(materialized='table_function', parameters='where_a') }}
 36 | select * from {{ ref("example_table") }} 
 37 | where a = where_a
 38 | """
 39 | 
 40 | models__use_table_function_1_param = """
 41 | {{ config(materialized='table') }}
 42 | select * from {{ ref("my_table_function_1_param") }}(4)
 43 | """
 44 | 
 45 | models__my_table_function_1_param_with_comma = """
 46 | {{ config(materialized='table_function', parameters='where_a, where_b') }}
 47 | select * from {{ ref("example_table") }} 
 48 | where 1=1
 49 |     and a = where_a 
 50 |     and b = where_b
 51 | """
 52 | 
 53 | models__use_table_function_1_param_with_comma = """
 54 | {{ config(materialized='table') }}
 55 | select * from {{ ref("my_table_function_1_param_with_comma") }}(4, 5)
 56 | """
 57 | 
 58 | models__my_table_function_2_params = """
 59 | {{ config(materialized='table_function', parameters=['where_a', 'where_b']) }}
 60 | select * from {{ ref("example_table") }} 
 61 | where 1=1
 62 |     and a = where_a 
 63 |     and b = where_b
 64 | """
 65 | 
 66 | models__use_table_function_2_params = """
 67 | {{ config(materialized='table') }}
 68 | select * from {{ ref("my_table_function_2_params") }}(4, 5)
 69 | """
 70 | 
 71 | # To test that the table function will work smoothly even if a column is added:
 72 | #   Create an example_table
 73 | #   create a table_function that is select * from example_table
 74 | #   Persist the output of that table function to a table
 75 | #   Alter the table to add a column
 76 | #   Persist the output of that table function to a new table (should include the new column)
 77 | # Note this will not recreate the table_function (which would have been needed with a view)
 78 | models__use_table_function_after_adding_column = """
 79 | -- depends_on: {{ ref('use_table_function') }}
 80 | {{ config(materialized='table') }}
 81 | {% set alter_table_query %}
 82 | alter table {{ ref("example_table") }} add column d integer default 42
 83 | {% endset %}
 84 | 
 85 | {% set results = run_query(alter_table_query) %}
 86 | select * from {{ ref("my_table_function") }}()
 87 | """
 88 | 
 89 | 
 90 | 
 91 | @pytest.mark.skip_profile("buenavista")
 92 | class TestTableFunction:
 93 | 
 94 |     @pytest.fixture(scope="class")
 95 |     def seeds(self):
 96 |         return {
 97 |             "seed.csv": seeds__example_seed_csv,
 98 |         }
 99 | 
100 |     @pytest.fixture(scope="class")
101 |     def models(self):
102 |         return {
103 |             "example_table.sql": models__example_table,
104 |             "my_table_function.sql": models__my_table_function,
105 |             "use_table_function.sql": models__use_table_function,
106 |             "my_table_function_1_param.sql": models__my_table_function_1_param,
107 |             "use_table_function_1_param.sql": models__use_table_function_1_param,
108 |             "my_table_function_1_param_with_comma.sql": models__my_table_function_1_param_with_comma,
109 |             "use_table_function_1_param_with_comma.sql": models__use_table_function_1_param_with_comma,
110 |             "my_table_function_2_params.sql": models__my_table_function_2_params,
111 |             "use_table_function_2_params.sql": models__use_table_function_2_params,
112 |             "use_table_function_after_adding_column.sql": models__use_table_function_after_adding_column,
113 |         }
114 | 
115 |     def test_base(self, project):
116 |         # seed command
117 |         results = run_dbt(["seed"])
118 |         assert len(results) == 1
119 |         check_result_nodes_by_name(results, ["seed"])
120 | 
121 |         results = run_dbt(["run"])
122 |         assert len(results) == 10
123 |         check_result_nodes_by_name(results, [
124 |             "example_table",
125 |             "my_table_function",
126 |             "use_table_function",
127 |             "my_table_function_1_param",
128 |             "use_table_function_1_param",
129 |             "my_table_function_1_param_with_comma",
130 |             "use_table_function_1_param_with_comma",
131 |             "my_table_function_2_params",
132 |             "use_table_function_2_params",
133 |             "use_table_function_after_adding_column"
134 |         ])
135 | 
136 |         relation_pre_alter = relation_from_name(project.adapter, "use_table_function")
137 |         result_pre_alter = project.run_sql(f"describe {relation_pre_alter}", fetch="all")
138 |         column_names_pre_alter = [row[0] for row in result_pre_alter]
139 |         assert column_names_pre_alter == ['a', 'b', 'c']
140 | 
141 |         relation_post_alter = relation_from_name(project.adapter, "use_table_function_after_adding_column")
142 |         result_post_alter = project.run_sql(f"describe {relation_post_alter}", fetch="all")
143 |         column_names_post_alter = [row[0] for row in result_post_alter]
144 |         assert column_names_post_alter == ['a', 'b', 'c', 'd']
145 | 


--------------------------------------------------------------------------------
/tests/unit/test_data_path_quoting.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from dbt.adapters.duckdb.credentials import Attachment
  3 | 
  4 | 
  5 | class TestDataPathQuoting:
  6 |     """Test that data_path options are properly quoted in SQL generation."""
  7 | 
  8 |     def test_data_path_s3_url_should_be_quoted(self):
  9 |         """Test that S3 URLs in data_path are properly quoted."""
 10 |         attachment = Attachment(
 11 |             path="/tmp/test.db",
 12 |             options={"data_path": "s3://my-bucket/path"}
 13 |         )
 14 |         sql = attachment.to_sql()
 15 |         # Should generate: ATTACH '/tmp/test.db' (DATA_PATH 's3://my-bucket/path')
 16 |         assert "DATA_PATH 's3://my-bucket/path'" in sql
 17 | 
 18 |     def test_data_path_windows_path_should_be_quoted(self):
 19 |         """Test that Windows paths in data_path are properly quoted."""
 20 |         attachment = Attachment(
 21 |             path="/tmp/test.db", 
 22 |             options={"data_path": "C:\\Users\\test\\data"}
 23 |         )
 24 |         sql = attachment.to_sql()
 25 |         # Should generate: ATTACH '/tmp/test.db' (DATA_PATH 'C:\Users\test\data')
 26 |         assert "DATA_PATH 'C:\\Users\\test\\data'" in sql
 27 | 
 28 |     def test_data_path_unix_path_should_be_quoted(self):
 29 |         """Test that Unix paths in data_path are properly quoted."""
 30 |         attachment = Attachment(
 31 |             path="/tmp/test.db",
 32 |             options={"data_path": "/home/user/data"}
 33 |         )
 34 |         sql = attachment.to_sql()
 35 |         # Should generate: ATTACH '/tmp/test.db' (DATA_PATH '/home/user/data')
 36 |         assert "DATA_PATH '/home/user/data'" in sql
 37 | 
 38 |     def test_data_path_url_with_spaces_should_be_quoted(self):
 39 |         """Test that paths with spaces are properly quoted."""
 40 |         attachment = Attachment(
 41 |             path="/tmp/test.db",
 42 |             options={"data_path": "/path/with spaces/data"}
 43 |         )
 44 |         sql = attachment.to_sql()
 45 |         # Should generate: ATTACH '/tmp/test.db' (DATA_PATH '/path/with spaces/data')
 46 |         assert "DATA_PATH '/path/with spaces/data'" in sql
 47 | 
 48 |     def test_numeric_options_should_not_be_quoted(self):
 49 |         """Test that numeric options are not quoted."""
 50 |         attachment = Attachment(
 51 |             path="/tmp/test.db",
 52 |             options={"timeout": 30000}
 53 |         )
 54 |         sql = attachment.to_sql()
 55 |         # Should generate: ATTACH '/tmp/test.db' (TIMEOUT 30000)
 56 |         assert "TIMEOUT 30000" in sql
 57 |         assert "TIMEOUT '30000'" not in sql
 58 | 
 59 |     def test_boolean_options_work_correctly(self):
 60 |         """Test that boolean options work as expected."""
 61 |         attachment = Attachment(
 62 |             path="/tmp/test.db",
 63 |             options={"use_cache": True, "skip_validation": False}
 64 |         )
 65 |         sql = attachment.to_sql()
 66 |         # True booleans should appear as flag, False booleans should be omitted
 67 |         assert "USE_CACHE" in sql
 68 |         assert "SKIP_VALIDATION" not in sql
 69 | 
 70 |     def test_multiple_options_with_data_path(self):
 71 |         """Test multiple options including data_path."""
 72 |         attachment = Attachment(
 73 |             path="/tmp/test.db",
 74 |             options={
 75 |                 "data_path": "s3://bucket/path",
 76 |                 "timeout": 5000,
 77 |                 "use_cache": True
 78 |             }
 79 |         )
 80 |         sql = attachment.to_sql()
 81 |         assert "DATA_PATH 's3://bucket/path'" in sql
 82 |         assert "TIMEOUT 5000" in sql
 83 |         assert "USE_CACHE" in sql
 84 | 
 85 |     def test_already_single_quoted_strings_not_double_quoted(self):
 86 |         """Test that already single-quoted strings are not double-quoted."""
 87 |         attachment = Attachment(
 88 |             path="/tmp/test.db",
 89 |             options={"data_path": "'s3://my-bucket/path'"}
 90 |         )
 91 |         sql = attachment.to_sql()
 92 |         # Should keep existing single quotes, not add more
 93 |         assert "DATA_PATH 's3://my-bucket/path'" in sql
 94 |         assert "DATA_PATH ''s3://my-bucket/path''" not in sql
 95 | 
 96 |     def test_already_double_quoted_strings_preserved(self):
 97 |         """Test that already double-quoted strings are preserved."""
 98 |         attachment = Attachment(
 99 |             path="/tmp/test.db",
100 |             options={"data_path": '"s3://my-bucket/path"'}
101 |         )
102 |         sql = attachment.to_sql()
103 |         # Should keep existing double quotes
104 |         assert 'DATA_PATH "s3://my-bucket/path"' in sql
105 |         assert 'DATA_PATH \'"s3://my-bucket/path"\'' not in sql
106 | 
107 |     def test_quoted_strings_with_whitespace_preserved(self):
108 |         """Test that quoted strings with surrounding whitespace are preserved."""
109 |         attachment = Attachment(
110 |             path="/tmp/test.db",
111 |             options={"data_path": "  's3://my-bucket/path'  "}
112 |         )
113 |         sql = attachment.to_sql()
114 |         # Should detect quotes despite whitespace and preserve original value
115 |         assert "DATA_PATH   's3://my-bucket/path'  " in sql
116 |         assert "DATA_PATH '  's3://my-bucket/path'  '" not in sql
117 | 
118 |     def test_quoted_strings_with_whitespace_double_quotes(self):
119 |         """Test that double quoted strings with surrounding whitespace are preserved."""
120 |         attachment = Attachment(
121 |             path="/tmp/test.db",
122 |             options={"data_path": '  "s3://my-bucket/path"  '}
123 |         )
124 |         sql = attachment.to_sql()
125 |         # Should detect quotes despite whitespace and preserve original value
126 |         assert 'DATA_PATH   "s3://my-bucket/path"  ' in sql
127 |         assert 'DATA_PATH \'  "s3://my-bucket/path"  \'' not in sql


--------------------------------------------------------------------------------
/dbt/include/duckdb/macros/materializations/incremental_strategy/merge_config_validation.sql:
--------------------------------------------------------------------------------
  1 | {% macro validate_merge_config(config, target_relation=none) %}
  2 |   {%- set errors = [] -%}
  3 | 
  4 |   {%- set base_configuration_fields = {
  5 |     'merge_update_condition': 'string',
  6 |     'merge_insert_condition': 'string',
  7 |     'merge_on_using_columns': 'sequence',
  8 |     'merge_update_columns': 'sequence',
  9 |     'merge_update_set_expressions': 'mapping',
 10 |     'merge_exclude_columns': 'sequence',
 11 |     'merge_returning_columns': 'sequence'
 12 |   } -%}
 13 | 
 14 |   {%- for field_name, field_type in base_configuration_fields.items() -%}
 15 |     {%- set field_value = config.get(field_name) -%}
 16 |     {%- if field_type == 'string' -%}
 17 |       {%- do validate_string_field(field_value, field_name, errors) -%}
 18 |     {%- elif field_type == 'sequence' -%}
 19 |       {%- do validate_string_list_field(field_value, field_name, errors) -%}
 20 |     {%- elif field_type == 'mapping' -%}
 21 |       {%- do validate_dict_field(field_value, field_name, errors) -%}
 22 |     {%- endif -%}
 23 |   {%- endfor -%}
 24 | 
 25 |   {%- do validate_ducklake_restrictions(config, target_relation, errors) -%}
 26 | 
 27 |   {%- do validate_merge_clauses(config, base_configuration_fields, errors) -%}
 28 | 
 29 |   {%- if errors -%}
 30 |     {{ exceptions.raise_compiler_error("MERGE configuration errors:\n" ~ errors|join('\n')) }}
 31 |   {%- endif -%}
 32 | {% endmacro %}
 33 | 
 34 | 
 35 | {%- macro validate_merge_clauses(config, base_configuration_fields, errors) -%}
 36 |   {%- if config.get('merge_clauses') is not none -%}
 37 |     {%- if config.get('merge_clauses') is not mapping -%}
 38 |       {%- do errors.append("merge_clauses must be a dictionary, found: " ~ config.get('merge_clauses')) -%}
 39 |     {%- else -%}
 40 |       {%- set merge_clauses = config.get('merge_clauses') -%}
 41 |       {%- set clause_types = ['when_matched', 'when_not_matched'] -%}
 42 | 
 43 |       {%- set has_when_matched = 'when_matched' in merge_clauses -%}
 44 |       {%- set has_when_not_matched = 'when_not_matched' in merge_clauses -%}
 45 | 
 46 |       {%- if not has_when_matched and not has_when_not_matched -%}
 47 |         {%- do errors.append("merge_clauses must contain at least one of 'when_matched' or 'when_not_matched' keys") -%}
 48 |       {%- endif -%}
 49 | 
 50 |       {%- for clause_type in clause_types -%}
 51 |         {%- if clause_type in merge_clauses -%}
 52 |           {%- do validate_merge_clause_list(merge_clauses, clause_type, errors) -%}
 53 |         {%- endif -%}
 54 |       {%- endfor -%}
 55 | 
 56 |       {%- set conflicting_configs = [] -%}
 57 |       {%- for config_name, config_type in base_configuration_fields.items() -%}
 58 |         {%- if config_name not in ['merge_on_using_columns', 'merge_returning_columns'] -%}
 59 |           {%- set config_value = config.get(config_name) -%}
 60 |           {%- if config_value is not none -%}
 61 |             {%- if config_type == 'sequence' -%}
 62 |               {%- if config_value|length > 0 -%}
 63 |                 {%- do conflicting_configs.append(config_name) -%}
 64 |               {%- endif -%}
 65 |             {%- elif config_type == 'mapping' -%}
 66 |               {%- if config_value.keys()|length > 0 -%}
 67 |                 {%- do conflicting_configs.append(config_name) -%}
 68 |               {%- endif -%}
 69 |             {%- else -%}
 70 |               {%- do conflicting_configs.append(config_name) -%}
 71 |             {%- endif -%}
 72 |           {%- endif -%}
 73 |         {%- endif -%}
 74 |       {%- endfor -%}
 75 | 
 76 |       {%- if conflicting_configs|length > 0 -%}
 77 |         {%- do errors.append("When merge_clauses is specified, the following basic merge configurations will be ignored and should be removed: " ~ conflicting_configs|join(', ') ~ ". Define your merge behavior within merge_clauses instead.") -%}
 78 |       {%- endif -%}
 79 |     {%- endif -%}
 80 |   {%- endif -%}
 81 | {%- endmacro -%}
 82 | 
 83 | {%- macro validate_merge_clause_list(merge_clauses, clause_type, errors) -%}
 84 |   {%- if merge_clauses.get(clause_type) is not sequence or merge_clauses.get(clause_type) is mapping or merge_clauses.get(clause_type) is string -%}
 85 |     {%- do errors.append("merge_clauses." ~ clause_type ~ " must be a list") -%}
 86 |   {%- elif merge_clauses.get(clause_type)|length == 0 -%}
 87 |     {%- do errors.append("merge_clauses." ~ clause_type ~ " must contain at least one element") -%}
 88 |   {%- else -%}
 89 |     {%- for clause in merge_clauses.get(clause_type) -%}
 90 |       {%- if clause is not mapping -%}
 91 |         {%- do errors.append("merge_clauses." ~ clause_type ~ " elements must be dictionaries, found: " ~ clause) -%}
 92 |       {%- endif -%}
 93 |     {%- endfor -%}
 94 |   {%- endif -%}
 95 | {%- endmacro -%}
 96 | 
 97 | {%- macro validate_ducklake_restrictions(config, target_relation, errors) -%}
 98 |   {%- if target_relation and adapter.is_ducklake(target_relation) -%}
 99 |     {%- set merge_clauses = config.get('merge_clauses', {}) -%}
100 |     {%- if merge_clauses and 'when_matched' in merge_clauses -%}
101 |       {%- set when_matched_clauses = merge_clauses.get('when_matched', []) -%}
102 |       {%- set update_delete_count = 0 -%}
103 | 
104 |       {%- for clause in when_matched_clauses -%}
105 |         {%- if clause is mapping and clause.get('action') in ['update', 'delete'] -%}
106 |           {%- set update_delete_count = update_delete_count + 1 -%}
107 |         {%- endif -%}
108 |       {%- endfor -%}
109 | 
110 |       {%- if update_delete_count > 1 -%}
111 |         {%- do errors.append("DuckLake MERGE restrictions: when_matched clauses can contain only a single UPDATE or DELETE action. Found " ~ update_delete_count ~ " UPDATE/DELETE actions. DuckLake currently supports only one UPDATE or DELETE operation per MERGE statement.") -%}
112 |       {%- endif -%}
113 |     {%- endif -%}
114 |   {%- endif -%}
115 | {%- endmacro -%}
116 | 


--------------------------------------------------------------------------------