├── .gitignore
├── run.png
├── test.png
├── bigquery.png
├── polaris.png
├── sandbox.png
├── sno-bq.png
├── macros
    ├── export_metadata.sql
    ├── ref.sql
    ├── create.sql
    └── create_bigquery.sql
├── models
    └── example
    │   ├── schema.yml
    │   ├── bq
    │       ├── my_third_dbt_model.sql
    │       └── my_fourth_dbt_model.sql
    │   ├── my_first_dbt_model.sql
    │   └── my_second_dbt_model.sql
├── dbt_project.yml
├── profiles.yml
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_packages/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/run.png


--------------------------------------------------------------------------------
/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/test.png


--------------------------------------------------------------------------------
/bigquery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/bigquery.png


--------------------------------------------------------------------------------
/polaris.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/polaris.png


--------------------------------------------------------------------------------
/sandbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/sandbox.png


--------------------------------------------------------------------------------
/sno-bq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/sno-bq.png


--------------------------------------------------------------------------------
/macros/export_metadata.sql:
--------------------------------------------------------------------------------
1 | {% macro export_iceberg_metadata( relation) -%}
2 |     EXPORT TABLE METADATA FROM {{relation}}
3 | {% endmacro %}
4 | 


--------------------------------------------------------------------------------
/models/example/schema.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 2
 3 | 
 4 | models:
 5 |   - name: my_second_dbt_model
 6 |     description: "A starter dbt model"
 7 |     columns:
 8 |       - name: L_RETURNFLAG
 9 |         description: "flag return"
10 |         data_tests:
11 |           - unique
12 |           - not_null
13 |   - name: my_fourth_dbt_model
14 |     description: "A starter dbt model"
15 |     columns:
16 |       - name: word
17 |         description: "flag return"
18 |         data_tests:
19 |           - unique
20 |           - not_null
21 | 


--------------------------------------------------------------------------------
/models/example/bq/my_third_dbt_model.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |     Welcome to your first dbt model!
 4 |     Did you know that you can also configure models directly within SQL files?
 5 |     This will override configurations stated in dbt_project.yml
 6 | 
 7 |     Try changing "table" to "view" below
 8 | */
 9 | 
10 | {{ config(
11 |     materialized='ephemeral',
12 | )
13 | }}
14 | 
15 | with base as (
16 | 
17 |     select * from `bigquery-public-data.samples.shakespeare`
18 |    
19 | 
20 | )
21 | 
22 | select 
23 |     *
24 | from base
25 | 
26 | 


--------------------------------------------------------------------------------
/models/example/my_first_dbt_model.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |     Welcome to your first dbt model!
 4 |     Did you know that you can also configure models directly within SQL files?
 5 |     This will override configurations stated in dbt_project.yml
 6 | 
 7 |     Try changing "table" to "view" below
 8 | */
 9 | 
10 | {{ config(
11 |     materialized='ephemeral',
12 | )
13 | }}
14 | 
15 | with base as (
16 | 
17 |     select 
18 |         * 
19 |     from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.LINEITEM
20 | 
21 | )
22 | 
23 | select 
24 |     *
25 | from base
26 | 
27 | 


--------------------------------------------------------------------------------
/models/example/bq/my_fourth_dbt_model.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- Use the `ref` function to select from other models
 3 | 
 4 | {{ config(
 5 |     materialized="incremental",
 6 |     cluster_by=["word"],
 7 |     table_format="iceberg",
 8 |     post_hook=[export_iceberg_metadata(this)],
 9 |     base_location = var(),
10 |     tags=["bigquery"]    
11 | )
12 | }}
13 | 
14 | SELECT
15 |       word,
16 |       SUM(word_count) AS count
17 |     FROM
18 |       {{ ref('my_third_dbt_model') }}
19 |     WHERE
20 |       word LIKE "%raisin%"
21 |     GROUP BY
22 |       word
23 | 


--------------------------------------------------------------------------------
/models/example/my_second_dbt_model.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- Use the `ref` function to select from other models
 3 | 
 4 | {{ config(
 5 |     materialized='table',
 6 |     iceberg=true
 7 | )
 8 | }}
 9 | 
10 | select
11 |        l_returnflag,
12 |        l_linestatus,
13 |        sum(l_quantity) as sum_qty,
14 |        sum(l_extendedprice) as sum_base_price,
15 |        sum(l_extendedprice * (1-l_discount)) as sum_disc_price,
16 |        sum(l_extendedprice * (1-l_discount) * (1+l_tax)) as sum_charge,
17 |        avg(l_quantity) as avg_qty,
18 |        avg(l_extendedprice) as avg_price,
19 |        avg(l_discount) as avg_disc,
20 |        count(*) as count_order
21 |  from
22 |        {{ ref('my_first_dbt_model') }}
23 |  where
24 |        l_shipdate <= dateadd(day, -90, to_date('1998-12-01'))
25 |  group by
26 |        l_returnflag,
27 |        l_linestatus
28 |  order by
29 |        l_returnflag,
30 |        l_linestatus
31 | 


--------------------------------------------------------------------------------
/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'sandbox'
 2 | 
 3 | # version from this yaml is loaded by packaging/setup.py during packaging
 4 | version: '1.0.0'
 5 | 
 6 | require-dbt-version: [">=0.21.0", "<=1.8.1"]
 7 | 
 8 | config-version: 2
 9 | 
10 | profile: 'bq-oauth'
11 | 
12 | target-path: "target"
13 | clean-targets: ["target", "dbt_modules"]
14 | macro-paths: ["macros"]
15 | log-path: "logs"
16 | test-paths: ["tests"]
17 | 
18 | # config for snowflake, could be potentially move to a model base?
19 | 
20 | vars:
21 |   catalog_iceberg: 'SNOWFLAKE'
22 |   external_volume_iceberg: "{{env_var('snowflake_external_volume')}}"
23 |   storage_base_url_iceberg: "{{env_var('snowflake_url_s3_bucket')}}"
24 |   polaris_catalog_sync: "{{env_var('polaris_catalog')}}"
25 | 
26 | 
27 |   ## bigquery
28 |   #biglake connection definition https://cloud.google.com/bigquery/docs/connections-api-intro
29 |   bq_connection: "{{env_var('polaris_catalog')}}"
30 | 


--------------------------------------------------------------------------------
/macros/ref.sql:
--------------------------------------------------------------------------------
 1 | {% macro ref(project_or_package, model_name) %}
 2 |   {% if target.name == 'duckdb'%}
 3 |     {# There should be way more checks here and all that#}
 4 |     {% set project_or_package, model_name = setup_project_and_model_name(project_or_package, model_name) %}
 5 |     {% set base_location = var("storage_base_url_iceberg") %}
 6 |     {% set iceberg_path = base_location ~ "/" ~ model_name %}
 7 |     {% set reference_to_iceberg = "iceberg_scan('" ~ iceberg_path ~ "')" %}
 8 |     {{ return (reference_to_iceberg) }}
 9 |   {% else %}
10 |     {% if model_name is undefined %}
11 |       {{ return (builtins.ref(project_or_package, **kwargs)) }}
12 |     {% else %}
13 |       {{ return (builtins.ref(project_or_package, model_name, **kwargs)) }}
14 |     {% endif %}
15 |   {% endif %}
16 | {% endmacro %}
17 | 
18 | {% macro setup_project_and_model_name(project_or_package, model_name) %}
19 |   {% set updated_project_or_package = project_or_package if model_name is defined else model.package_name %}
20 |   {% set updated_model_name = model_name if model_name is defined else project_or_package %}
21 |   {{ return((updated_project_or_package, updated_model_name)) }}
22 | {% endmacro %}


--------------------------------------------------------------------------------
/profiles.yml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   use_colors: True # change this to test if config is loading
 3 |   send_anonymous_usage_stats: False # can't be making external connections from internal systems
 4 |   printer_width: 160 # default 80
 5 | 
 6 | bq-oauth:
 7 |   target: testing # default target
 8 |   outputs:
 9 |     duckdb:
10 |       extensions:
11 |        - iceberg
12 |       type: duckdb
13 |       path: localiceberg.duckdb
14 |       settings:
15 |         s3_region: eu-north-1
16 |         s3_access_key_id: "{{env_var('s3_access_key_id')}}"
17 |         s3_secret_access_key: "{{env_var('s3_secret_access_key')}}"
18 |     snowflake:
19 |       type: snowflake
20 |       account: "{{env_var('snowflake_account')}}"
21 |       user: "{{env_var('snowflake_user')}}"
22 |       password: "{{env_var('snowflake_user_password')}}"
23 |       role: "{{env_var('snowflake_user_role')}}"
24 |       database: snowflakeinceberg_test
25 |       warehouse: snowflakeinceberg_test_wh
26 |       schema: public
27 |       threads: 200
28 |     bigquery:
29 |       type: bigquery
30 |       method: oauth
31 |       project: "{{env_var('bigquery_project')}}"
32 |       dataset: "{{env_var('bigquery_dataset')}}" 
33 |       threads: 4 # Must be a value of 1 or greater 
34 | 


--------------------------------------------------------------------------------
/macros/create.sql:
--------------------------------------------------------------------------------
 1 | {% macro snowflake__create_table_as(temporary, relation, compiled_code, language='sql') -%}
 2 |   {%- set transient = config.get('transient', default=true) -%}
 3 |   {%- set iceberg = config.get('iceberg') -%}
 4 | 
 5 |   {% if temporary -%}
 6 |     {%- set table_type = "temporary" -%}
 7 |   {%- elif iceberg -%}
 8 |     {%- set table_type = "iceberg" -%}
 9 |   {%- elif transient -%}
10 |     {%- set table_type = "transient" -%}
11 |   {%- else -%}
12 |     {%- set table_type = "" -%}
13 |   {%- endif %}
14 | 
15 |   {%- if language == 'sql' -%}
16 |     
17 |     {%- set cluster_by_keys = config.get('cluster_by', default=none) -%}
18 |     {%- set enable_automatic_clustering = config.get('automatic_clustering', default=false) -%}
19 |     {%- set copy_grants = config.get('copy_grants', default=false) -%}
20 |     {%- if cluster_by_keys is not none and cluster_by_keys is string -%}
21 |       {%- set cluster_by_keys = [cluster_by_keys] -%}
22 |     {%- endif -%}
23 |     {%- if cluster_by_keys is not none -%}
24 |       {%- set cluster_by_string = cluster_by_keys|join(", ")-%}
25 |     {% else %}
26 |       {%- set cluster_by_string = none -%}
27 |     {%- endif -%}
28 |     {%- set sql_header = config.get('sql_header', none) -%}
29 | 
30 |     {%- set catalog = var('catalog_iceberg') -%}
31 |     {%- set external_volume = var('external_volume_iceberg') -%}
32 |     {%- set polaris_catalog_sync = var('polaris_catalog_sync') -%}
33 |     {%- set base_location = relation.identifier -%}
34 | 
35 | 
36 |     {{ sql_header if sql_header is not none }}
37 | 
38 |         create or replace {{ table_type }} table {{ relation }}
39 |         {%- set contract_config = config.get('contract') -%}
40 |         {%- if contract_config.enforced -%}
41 |           {{ get_assert_columns_equivalent(sql) }}
42 |           {{ get_table_columns_and_constraints() }}
43 |           {% set compiled_code = get_select_subquery(compiled_code) %}
44 |         {% endif %}
45 |         {% if iceberg and catalog and external_volume and base_location %}
46 |           CATALOG="{{ catalog }}",
47 |           EXTERNAL_VOLUME="{{ external_volume }}",
48 |           BASE_LOCATION="{{ base_location }}"
49 |           {% if polaris_catalog_sync %}
50 |             CATALOG_SYNC = "{{ polaris_catalog_sync }}"
51 |           {% endif %}  
52 |         {% endif %}  
53 |         {% if copy_grants and not temporary -%} copy grants {%- endif %} as
54 |         (
55 |           {%- if cluster_by_string is not none -%}
56 |             select * from (
57 |               {{ compiled_code }}
58 |               ) order by ({{ cluster_by_string }})
59 |           {%- else -%}
60 |             {{ compiled_code }}
61 |           {%- endif %}
62 |         )
63 |       
64 |       ;
65 |       {% if cluster_by_string is not none and not temporary -%}
66 |         alter table {{relation}} cluster by ({{cluster_by_string}});
67 |       {%- endif -%}
68 |       {% if enable_automatic_clustering and cluster_by_string is not none and not temporary  -%}
69 |         alter table {{relation}} resume recluster;
70 |       {%- endif -%}
71 | 
72 |   {%- elif language == 'python' -%}
73 |     {{ py_write_table(compiled_code=compiled_code, target_relation=relation, table_type=table_type) }}
74 |   {%- else -%}
75 |       {% do exceptions.raise_compiler_error("snowflake__create_table_as macro didn't get supported language, it got %s" % language) %}
76 |   {%- endif -%}
77 | 
78 | {% endmacro %}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multistack dbt and iceberg
 2 | 
 3 | 
 4 | # Multistack with polaris
 5 | ![image](polaris.png)
 6 | 
 7 | # Multistack with bigquery
 8 | ![image](bigquery.png)
 9 | 
10 | # Multistack with bigquery as main writer
11 | ![image](sno-bq.png)
12 | 
13 | This is a  proof of concept on how to use dbt with iceberg to run a multi-stack environment.
14 | 
15 | ### running the models with snowflake creating iceberg tables
16 | ![image](run.png)
17 | ### running the tests with duckdb against the tables in iceberg
18 | ![image](test.png)
19 | This example uses Snowflake as the main engine to create iceberg tables and duckdb as a secondary engine to run tests.
20 | 
21 | For now, Snowflake only offers a catalog where only Snowflake can write, but any other engines can read. With Polaris this will change tho!.
22 | 
23 | # Snowflake and iceberg :(
24 |     
25 | With the new bundle 2024_05 Snowflake no longer generates the version-hint.text file [bundle](https://docs.snowflake.com/en/release-notes/bcr-bundles/2024_05/bcr-1658).
26 | You can disable this bundle temporarely to make this work with duckdb
27 | ```sql
28 |     SELECT SYSTEM$DISABLE_BEHAVIOR_CHANGE_BUNDLE('2024_05');
29 | ```
30 | 
31 | # Extending dbt to work with Iceberg
32 |  - Modify the create table adaptor for snowflake to write in Iceberg format
33 |     - dbt will dynamically compile the path where snowflake will write the data based on the name of the model. So for example `stg_customers` will end up being written in the storage lager like `s3://mybucket/iceberg/stg_customers`.
34 |  - duckdb right now only accepts reading iceberg directly from folders, without any write operations. To enable this we also extended the ref macro from dbt to modify the compiled ref to use the read_iceberg() of the actual folder snowflake wrote to through iceberg. So at compilation time in dbt we can change the reference to `read_iceberg('s3://mybucket/iceberg/stg_customers')`. 
35 |     - Because Snowflake writes the version-hint.txt, we are safe with duckdb to always read the latest version of the data, and we can bypass the need of a catalog.
36 |     - This is obviously not ideal, but for now is the only acailable option duckdb provides.
37 | 
38 | 
39 | Now, depending on the target, we will be able to access exactly the same data, but through different engines:
40 |  - With snowflake we can run CREATE + SELECT operations
41 |  - With duckdb we can run SELECT operations
42 | 
43 | 
44 | This is just a proof of concept that it's possible to have dbt working as a multi-stack transformation layer. A lot of decisions/code could have been done in a different way.
45 | 
46 | 
47 | 
48 | # How to get it running:
49 | 
50 | * Create a bucket you can use externally (GCS, S3...)
51 | * [Configure an external volumen in Snowflake](https://docs.snowflake.com/en/user-guide/tables-iceberg-configure-external-volume-s3). This will use the previous bucket to store the data
52 |     * Update the following vars in dbt_project.yml
53 |     ```yml
54 |         vars:
55 |             catalog_iceberg: 'SNOWFLAKE' # Keep as snowflake if you want to use the snowflake catalog
56 |             external_volume_iceberg: 'my-external-volume-in-snowflake' # volume name you just created in snowflake
57 |             storage_base_url_iceberg: 's3://my-bucket/iceberg' # storage url used as base for accessing through duckdb directly as for now we can't have access to a catalog. This property could probably be dynamic based on models and not global.
58 |     ```
59 | 
60 | * Configure your dbt profiles.yml by adding the credentials to connect to snowflake and duckdb, there's an example in this project.
61 | * Profit
62 | 
63 | # Extending dbt to work with Polaris
64 | If you want to use Polaris, [follow this tutorial to create a polaris catalog integration](https://other-docs.snowflake.com/polaris/tutorials/polaris-gs#use-case-create-table-using-apache-spark). Then, update the following vars in your dbt_project.yml:
65 | ```yml
66 |     vars:
67 |         polaris_catalog_sync: 'polaris_catalog_sync' # the polaris catalog name you set when creating the `CREATE OR REPLACE CATALOG INTEGRATION <polaris_catalog_sync>`
68 | ```
69 | 
70 | # Run dbt
71 | 
72 | You just have to decide the --target. Remember that duck doesn't allow writing, so only tests can be run with that engine
73 | 
74 | * `dbt run --target snowflake`
75 | * `dbt test --target duckdb` 


--------------------------------------------------------------------------------
/macros/create_bigquery.sql:
--------------------------------------------------------------------------------
  1 | {% macro bigquery__create_table_as(temporary, relation, compiled_code, language='sql') -%}
  2 |   {%- if language == 'sql' -%}
  3 |     {%- set raw_partition_by = config.get('partition_by', none) -%}
  4 |     {%- set raw_cluster_by = config.get('cluster_by', none) -%}
  5 |     {%- set sql_header = config.get('sql_header', none) -%}
  6 |     {%- set table_format = config.get('table_format', 'default') -%}
  7 |     {%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%}
  8 |     {%- if partition_config.time_ingestion_partitioning -%}
  9 |     {%- set columns = get_columns_with_types_in_query_sql(sql) -%}
 10 |     {%- set table_dest_columns_csv = columns_without_partition_fields_csv(partition_config, columns) -%}
 11 |     {%- set columns = '(' ~ table_dest_columns_csv ~ ')' -%}
 12 |     {%- endif -%}
 13 | 
 14 |     {{ sql_header if sql_header is not none }}
 15 | 
 16 |     create or replace table {{ relation }}
 17 |       {%- set contract_config = config.get('contract') -%}
 18 |       {%- if contract_config.enforced -%}
 19 |         {{ get_assert_columns_equivalent(compiled_code) }}
 20 |         {{ get_table_columns_and_constraints() }}
 21 |         {%- set compiled_code = get_select_subquery(compiled_code) %}
 22 |       {% else %}
 23 |         {#-- cannot do contracts at the same time as time ingestion partitioning -#}
 24 |         {{ columns }}
 25 |       {% endif %}
 26 |     {%- if table_format == "iceberg" and partition_config is not none-%}
 27 |         {% do exceptions.raise_compiler_error("Partition by not available in iceberg tables, use cluster by instead") %}
 28 |         {#-- PARTITION BY cannot be used in iceberg-#}
 29 |     {%- else -%}
 30 |         {{ partition_by(partition_config) }}
 31 |     {% endif %}  
 32 |     {{ cluster_by(raw_cluster_by) }}
 33 |     {% if table_format == "iceberg" %}
 34 |         {% set base_location = config.get('base_location') %}
 35 |         {%- if not base_location-%}
 36 |             {% do exceptions.raise_compiler_error("base_location not found") %}
 37 |         {% endif %}  
 38 |         {% set sub_path = relation.identifier %}
 39 |         {% set bq_connection = var('bq_connection') %}
 40 |         {% set connection = "WITH CONNECTION `"~bq_connection~"`" %}
 41 |         {% set options %}
 42 |          OPTIONS(
 43 |             file_format = 'PARQUET',
 44 |             table_format = 'ICEBERG',
 45 |             storage_uri = '{{base_location}}/{{sub_path}}'
 46 |         )
 47 |         {%- endset -%}
 48 |         {{- connection -}}
 49 |         {#-- pass this through {{ bigquery_table_options(config, model, temporary) }}-#}
 50 |         {{- options -}}
 51 |         
 52 |     {% endif %}  
 53 |     
 54 | 
 55 |     {#-- PARTITION BY cannot be used with the AS query_statement clause.
 56 |          https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression
 57 |     -#}
 58 |     {%- if not partition_config.time_ingestion_partitioning %}
 59 |     as (
 60 |       {{ compiled_code }}
 61 |     );
 62 |     {%- endif %}
 63 |   {%- elif language == 'python' -%}
 64 |     {#--
 65 |     N.B. Python models _can_ write to temp views HOWEVER they use a different session
 66 |     and have already expired by the time they need to be used (I.E. in merges for incremental models)
 67 | 
 68 |     TODO: Deep dive into spark sessions to see if we can reuse a single session for an entire
 69 |     dbt invocation.
 70 |      --#}
 71 | 
 72 |     {#-- when a user wants to change the schema of an existing relation, they must intentionally drop the table in the dataset --#}
 73 |     {%- set old_relation = adapter.get_relation(database=relation.database, schema=relation.schema, identifier=relation.identifier) -%}
 74 |     {%- if (old_relation.is_table and (should_full_refresh())) -%}
 75 |       {% do adapter.drop_relation(relation) %}
 76 |     {%- endif -%}
 77 |     {{ py_write_table(compiled_code=compiled_code, target_relation=relation.quote(database=False, schema=False, identifier=False)) }}
 78 |   {%- else -%}
 79 |     {% do exceptions.raise_compiler_error("bigquery__create_table_as macro didn't get supported language, it got %s" % language) %}
 80 |   {%- endif -%}
 81 | 
 82 | {%- endmacro -%}
 83 | 
 84 | {% macro bigquery__create_view_as(relation, sql) -%}
 85 |   {%- set sql_header = config.get('sql_header', none) -%}
 86 | 
 87 |   {{ sql_header if sql_header is not none }}
 88 | 
 89 |   create or replace view {{ relation }}
 90 |   {{ bigquery_view_options(config, model) }}
 91 |   {%- set contract_config = config.get('contract') -%}
 92 |   {%- if contract_config.enforced -%}
 93 |     {{ get_assert_columns_equivalent(sql) }}
 94 |   {%- endif %}
 95 |   as {{ sql }};
 96 | 
 97 | {% endmacro %}
 98 | 
 99 | {% macro bigquery__drop_schema(relation) -%}
100 |   {{ adapter.drop_schema(relation) }}
101 | {% endmacro %}
102 | 
103 | {% macro bigquery__get_columns_in_relation(relation) -%}
104 |   {{ return(adapter.get_columns_in_relation(relation)) }}
105 | {% endmacro %}
106 | 
107 | 
108 | {% macro bigquery__list_relations_without_caching(schema_relation) -%}
109 |   {{ return(adapter.list_relations_without_caching(schema_relation)) }}
110 | {%- endmacro %}
111 | 
112 | 
113 | {% macro bigquery__list_schemas(database) -%}
114 |   {{ return(adapter.list_schemas(database)) }}
115 | {% endmacro %}
116 | 
117 | 
118 | {% macro bigquery__check_schema_exists(information_schema, schema) %}
119 |   {{ return(adapter.check_schema_exists(information_schema.database, schema)) }}
120 | {% endmacro %}
121 | 
122 | {#-- relation-level macro is not implemented. This is handled in the CTAs statement #}
123 | {% macro bigquery__persist_docs(relation, model, for_relation, for_columns) -%}
124 |   {% if for_columns and config.persist_column_docs() and model.columns %}
125 |     {% do alter_column_comment(relation, model.columns) %}
126 |   {% endif %}
127 | {% endmacro %}
128 | 
129 | {% macro bigquery__alter_column_comment(relation, column_dict) -%}
130 |   {% do adapter.update_columns(relation, column_dict) %}
131 | {% endmacro %}
132 | 
133 | {% macro bigquery__alter_relation_add_columns(relation, add_columns) %}
134 | 
135 |   {% set sql -%}
136 | 
137 |      alter {{ relation.type }} {{ relation }}
138 |         {% for column in add_columns %}
139 |           add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
140 |         {% endfor %}
141 | 
142 |   {%- endset -%}
143 | 
144 |   {{ return(run_query(sql)) }}
145 | 
146 | {% endmacro %}
147 | 
148 | {% macro bigquery__alter_relation_drop_columns(relation, drop_columns) %}
149 | 
150 |   {% set sql -%}
151 | 
152 |      alter {{ relation.type }} {{ relation }}
153 | 
154 |         {% for column in drop_columns %}
155 |           drop column {{ column.name }}{{ ',' if not loop.last }}
156 |         {% endfor %}
157 | 
158 |   {%- endset -%}
159 | 
160 |   {{ return(run_query(sql)) }}
161 | 
162 | {% endmacro %}
163 | 
164 | 
165 | {% macro bigquery__alter_column_type(relation, column_name, new_column_type) -%}
166 |   {#-- Changing a column's data type using a query requires you to scan the entire table.
167 |     The query charges can be significant if the table is very large.
168 | 
169 |     https://cloud.google.com/bigquery/docs/manually-changing-schemas#changing_a_columns_data_type
170 |   #}
171 |   {% set relation_columns = get_columns_in_relation(relation) %}
172 | 
173 |   {% set sql %}
174 |     select
175 |       {%- for col in relation_columns -%}
176 |         {% if col.column == column_name %}
177 |           CAST({{ col.quoted }} AS {{ new_column_type }}) AS {{ col.quoted }}
178 |         {%- else %}
179 |           {{ col.quoted }}
180 |         {%- endif %}
181 |         {%- if not loop.last %},{% endif -%}
182 |       {%- endfor %}
183 |     from {{ relation }}
184 |   {% endset %}
185 | 
186 |   {% call statement('alter_column_type') %}
187 |     {{ create_table_as(False, relation, sql)}}
188 |   {%- endcall %}
189 | 
190 | {% endmacro %}
191 | 
192 | 
193 | {% macro bigquery__test_unique(model, column_name) %}
194 | 
195 | with dbt_test__target as (
196 | 
197 |   select {{ column_name }} as unique_field
198 |   from {{ model }}
199 |   where {{ column_name }} is not null
200 | 
201 | )
202 | 
203 | select
204 |     unique_field,
205 |     count(*) as n_records
206 | 
207 | from dbt_test__target
208 | group by unique_field
209 | having count(*) > 1
210 | 
211 | {% endmacro %}
212 | 
213 | {% macro bigquery__upload_file(local_file_path, database, table_schema, table_name) %}
214 | 
215 |   {{ log("kwargs: " ~ kwargs) }}
216 | 
217 |   {% do adapter.upload_file(local_file_path, database, table_schema, table_name, kwargs=kwargs) %}
218 | 
219 | {% endmacro %}
220 | 


--------------------------------------------------------------------------------