├── .gitignore ├── run.png ├── test.png ├── bigquery.png ├── polaris.png ├── sandbox.png ├── sno-bq.png ├── macros ├── export_metadata.sql ├── ref.sql ├── create.sql └── create_bigquery.sql ├── models └── example │ ├── schema.yml │ ├── bq │ ├── my_third_dbt_model.sql │ └── my_fourth_dbt_model.sql │ ├── my_first_dbt_model.sql │ └── my_second_dbt_model.sql ├── dbt_project.yml ├── profiles.yml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/run.png -------------------------------------------------------------------------------- /test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/test.png -------------------------------------------------------------------------------- /bigquery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/bigquery.png -------------------------------------------------------------------------------- /polaris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/polaris.png -------------------------------------------------------------------------------- /sandbox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/sandbox.png -------------------------------------------------------------------------------- /sno-bq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borjavb/dbt-iceberg-poc/HEAD/sno-bq.png -------------------------------------------------------------------------------- /macros/export_metadata.sql: -------------------------------------------------------------------------------- 1 | {% macro export_iceberg_metadata( relation) -%} 2 | EXPORT TABLE METADATA FROM {{relation}} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /models/example/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: my_second_dbt_model 6 | description: "A starter dbt model" 7 | columns: 8 | - name: L_RETURNFLAG 9 | description: "flag return" 10 | data_tests: 11 | - unique 12 | - not_null 13 | - name: my_fourth_dbt_model 14 | description: "A starter dbt model" 15 | columns: 16 | - name: word 17 | description: "flag return" 18 | data_tests: 19 | - unique 20 | - not_null 21 | -------------------------------------------------------------------------------- /models/example/bq/my_third_dbt_model.sql: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | Welcome to your first dbt model! 4 | Did you know that you can also configure models directly within SQL files? 5 | This will override configurations stated in dbt_project.yml 6 | 7 | Try changing "table" to "view" below 8 | */ 9 | 10 | {{ config( 11 | materialized='ephemeral', 12 | ) 13 | }} 14 | 15 | with base as ( 16 | 17 | select * from `bigquery-public-data.samples.shakespeare` 18 | 19 | 20 | ) 21 | 22 | select 23 | * 24 | from base 25 | 26 | -------------------------------------------------------------------------------- /models/example/my_first_dbt_model.sql: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | Welcome to your first dbt model! 4 | Did you know that you can also configure models directly within SQL files? 5 | This will override configurations stated in dbt_project.yml 6 | 7 | Try changing "table" to "view" below 8 | */ 9 | 10 | {{ config( 11 | materialized='ephemeral', 12 | ) 13 | }} 14 | 15 | with base as ( 16 | 17 | select 18 | * 19 | from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.LINEITEM 20 | 21 | ) 22 | 23 | select 24 | * 25 | from base 26 | 27 | -------------------------------------------------------------------------------- /models/example/bq/my_fourth_dbt_model.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Use the `ref` function to select from other models 3 | 4 | {{ config( 5 | materialized="incremental", 6 | cluster_by=["word"], 7 | table_format="iceberg", 8 | post_hook=[export_iceberg_metadata(this)], 9 | base_location = var(), 10 | tags=["bigquery"] 11 | ) 12 | }} 13 | 14 | SELECT 15 | word, 16 | SUM(word_count) AS count 17 | FROM 18 | {{ ref('my_third_dbt_model') }} 19 | WHERE 20 | word LIKE "%raisin%" 21 | GROUP BY 22 | word 23 | -------------------------------------------------------------------------------- /models/example/my_second_dbt_model.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Use the `ref` function to select from other models 3 | 4 | {{ config( 5 | materialized='table', 6 | iceberg=true 7 | ) 8 | }} 9 | 10 | select 11 | l_returnflag, 12 | l_linestatus, 13 | sum(l_quantity) as sum_qty, 14 | sum(l_extendedprice) as sum_base_price, 15 | sum(l_extendedprice * (1-l_discount)) as sum_disc_price, 16 | sum(l_extendedprice * (1-l_discount) * (1+l_tax)) as sum_charge, 17 | avg(l_quantity) as avg_qty, 18 | avg(l_extendedprice) as avg_price, 19 | avg(l_discount) as avg_disc, 20 | count(*) as count_order 21 | from 22 | {{ ref('my_first_dbt_model') }} 23 | where 24 | l_shipdate <= dateadd(day, -90, to_date('1998-12-01')) 25 | group by 26 | l_returnflag, 27 | l_linestatus 28 | order by 29 | l_returnflag, 30 | l_linestatus 31 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'sandbox' 2 | 3 | # version from this yaml is loaded by packaging/setup.py during packaging 4 | version: '1.0.0' 5 | 6 | require-dbt-version: [">=0.21.0", "<=1.8.1"] 7 | 8 | config-version: 2 9 | 10 | profile: 'bq-oauth' 11 | 12 | target-path: "target" 13 | clean-targets: ["target", "dbt_modules"] 14 | macro-paths: ["macros"] 15 | log-path: "logs" 16 | test-paths: ["tests"] 17 | 18 | # config for snowflake, could be potentially move to a model base? 19 | 20 | vars: 21 | catalog_iceberg: 'SNOWFLAKE' 22 | external_volume_iceberg: "{{env_var('snowflake_external_volume')}}" 23 | storage_base_url_iceberg: "{{env_var('snowflake_url_s3_bucket')}}" 24 | polaris_catalog_sync: "{{env_var('polaris_catalog')}}" 25 | 26 | 27 | ## bigquery 28 | #biglake connection definition https://cloud.google.com/bigquery/docs/connections-api-intro 29 | bq_connection: "{{env_var('polaris_catalog')}}" 30 | -------------------------------------------------------------------------------- /macros/ref.sql: -------------------------------------------------------------------------------- 1 | {% macro ref(project_or_package, model_name) %} 2 | {% if target.name == 'duckdb'%} 3 | {# There should be way more checks here and all that#} 4 | {% set project_or_package, model_name = setup_project_and_model_name(project_or_package, model_name) %} 5 | {% set base_location = var("storage_base_url_iceberg") %} 6 | {% set iceberg_path = base_location ~ "/" ~ model_name %} 7 | {% set reference_to_iceberg = "iceberg_scan('" ~ iceberg_path ~ "')" %} 8 | {{ return (reference_to_iceberg) }} 9 | {% else %} 10 | {% if model_name is undefined %} 11 | {{ return (builtins.ref(project_or_package, **kwargs)) }} 12 | {% else %} 13 | {{ return (builtins.ref(project_or_package, model_name, **kwargs)) }} 14 | {% endif %} 15 | {% endif %} 16 | {% endmacro %} 17 | 18 | {% macro setup_project_and_model_name(project_or_package, model_name) %} 19 | {% set updated_project_or_package = project_or_package if model_name is defined else model.package_name %} 20 | {% set updated_model_name = model_name if model_name is defined else project_or_package %} 21 | {{ return((updated_project_or_package, updated_model_name)) }} 22 | {% endmacro %} -------------------------------------------------------------------------------- /profiles.yml: -------------------------------------------------------------------------------- 1 | config: 2 | use_colors: True # change this to test if config is loading 3 | send_anonymous_usage_stats: False # can't be making external connections from internal systems 4 | printer_width: 160 # default 80 5 | 6 | bq-oauth: 7 | target: testing # default target 8 | outputs: 9 | duckdb: 10 | extensions: 11 | - iceberg 12 | type: duckdb 13 | path: localiceberg.duckdb 14 | settings: 15 | s3_region: eu-north-1 16 | s3_access_key_id: "{{env_var('s3_access_key_id')}}" 17 | s3_secret_access_key: "{{env_var('s3_secret_access_key')}}" 18 | snowflake: 19 | type: snowflake 20 | account: "{{env_var('snowflake_account')}}" 21 | user: "{{env_var('snowflake_user')}}" 22 | password: "{{env_var('snowflake_user_password')}}" 23 | role: "{{env_var('snowflake_user_role')}}" 24 | database: snowflakeinceberg_test 25 | warehouse: snowflakeinceberg_test_wh 26 | schema: public 27 | threads: 200 28 | bigquery: 29 | type: bigquery 30 | method: oauth 31 | project: "{{env_var('bigquery_project')}}" 32 | dataset: "{{env_var('bigquery_dataset')}}" 33 | threads: 4 # Must be a value of 1 or greater 34 | -------------------------------------------------------------------------------- /macros/create.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake__create_table_as(temporary, relation, compiled_code, language='sql') -%} 2 | {%- set transient = config.get('transient', default=true) -%} 3 | {%- set iceberg = config.get('iceberg') -%} 4 | 5 | {% if temporary -%} 6 | {%- set table_type = "temporary" -%} 7 | {%- elif iceberg -%} 8 | {%- set table_type = "iceberg" -%} 9 | {%- elif transient -%} 10 | {%- set table_type = "transient" -%} 11 | {%- else -%} 12 | {%- set table_type = "" -%} 13 | {%- endif %} 14 | 15 | {%- if language == 'sql' -%} 16 | 17 | {%- set cluster_by_keys = config.get('cluster_by', default=none) -%} 18 | {%- set enable_automatic_clustering = config.get('automatic_clustering', default=false) -%} 19 | {%- set copy_grants = config.get('copy_grants', default=false) -%} 20 | {%- if cluster_by_keys is not none and cluster_by_keys is string -%} 21 | {%- set cluster_by_keys = [cluster_by_keys] -%} 22 | {%- endif -%} 23 | {%- if cluster_by_keys is not none -%} 24 | {%- set cluster_by_string = cluster_by_keys|join(", ")-%} 25 | {% else %} 26 | {%- set cluster_by_string = none -%} 27 | {%- endif -%} 28 | {%- set sql_header = config.get('sql_header', none) -%} 29 | 30 | {%- set catalog = var('catalog_iceberg') -%} 31 | {%- set external_volume = var('external_volume_iceberg') -%} 32 | {%- set polaris_catalog_sync = var('polaris_catalog_sync') -%} 33 | {%- set base_location = relation.identifier -%} 34 | 35 | 36 | {{ sql_header if sql_header is not none }} 37 | 38 | create or replace {{ table_type }} table {{ relation }} 39 | {%- set contract_config = config.get('contract') -%} 40 | {%- if contract_config.enforced -%} 41 | {{ get_assert_columns_equivalent(sql) }} 42 | {{ get_table_columns_and_constraints() }} 43 | {% set compiled_code = get_select_subquery(compiled_code) %} 44 | {% endif %} 45 | {% if iceberg and catalog and external_volume and base_location %} 46 | CATALOG="{{ catalog }}", 47 | EXTERNAL_VOLUME="{{ external_volume }}", 48 | BASE_LOCATION="{{ base_location }}" 49 | {% if polaris_catalog_sync %} 50 | CATALOG_SYNC = "{{ polaris_catalog_sync }}" 51 | {% endif %} 52 | {% endif %} 53 | {% if copy_grants and not temporary -%} copy grants {%- endif %} as 54 | ( 55 | {%- if cluster_by_string is not none -%} 56 | select * from ( 57 | {{ compiled_code }} 58 | ) order by ({{ cluster_by_string }}) 59 | {%- else -%} 60 | {{ compiled_code }} 61 | {%- endif %} 62 | ) 63 | 64 | ; 65 | {% if cluster_by_string is not none and not temporary -%} 66 | alter table {{relation}} cluster by ({{cluster_by_string}}); 67 | {%- endif -%} 68 | {% if enable_automatic_clustering and cluster_by_string is not none and not temporary -%} 69 | alter table {{relation}} resume recluster; 70 | {%- endif -%} 71 | 72 | {%- elif language == 'python' -%} 73 | {{ py_write_table(compiled_code=compiled_code, target_relation=relation, table_type=table_type) }} 74 | {%- else -%} 75 | {% do exceptions.raise_compiler_error("snowflake__create_table_as macro didn't get supported language, it got %s" % language) %} 76 | {%- endif -%} 77 | 78 | {% endmacro %} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multistack dbt and iceberg 2 | 3 | 4 | # Multistack with polaris 5 | ![image](polaris.png) 6 | 7 | # Multistack with bigquery 8 | ![image](bigquery.png) 9 | 10 | # Multistack with bigquery as main writer 11 | ![image](sno-bq.png) 12 | 13 | This is a proof of concept on how to use dbt with iceberg to run a multi-stack environment. 14 | 15 | ### running the models with snowflake creating iceberg tables 16 | ![image](run.png) 17 | ### running the tests with duckdb against the tables in iceberg 18 | ![image](test.png) 19 | This example uses Snowflake as the main engine to create iceberg tables and duckdb as a secondary engine to run tests. 20 | 21 | For now, Snowflake only offers a catalog where only Snowflake can write, but any other engines can read. With Polaris this will change tho!. 22 | 23 | # Snowflake and iceberg :( 24 | 25 | With the new bundle 2024_05 Snowflake no longer generates the version-hint.text file [bundle](https://docs.snowflake.com/en/release-notes/bcr-bundles/2024_05/bcr-1658). 26 | You can disable this bundle temporarely to make this work with duckdb 27 | ```sql 28 | SELECT SYSTEM$DISABLE_BEHAVIOR_CHANGE_BUNDLE('2024_05'); 29 | ``` 30 | 31 | # Extending dbt to work with Iceberg 32 | - Modify the create table adaptor for snowflake to write in Iceberg format 33 | - dbt will dynamically compile the path where snowflake will write the data based on the name of the model. So for example `stg_customers` will end up being written in the storage lager like `s3://mybucket/iceberg/stg_customers`. 34 | - duckdb right now only accepts reading iceberg directly from folders, without any write operations. To enable this we also extended the ref macro from dbt to modify the compiled ref to use the read_iceberg() of the actual folder snowflake wrote to through iceberg. So at compilation time in dbt we can change the reference to `read_iceberg('s3://mybucket/iceberg/stg_customers')`. 35 | - Because Snowflake writes the version-hint.txt, we are safe with duckdb to always read the latest version of the data, and we can bypass the need of a catalog. 36 | - This is obviously not ideal, but for now is the only acailable option duckdb provides. 37 | 38 | 39 | Now, depending on the target, we will be able to access exactly the same data, but through different engines: 40 | - With snowflake we can run CREATE + SELECT operations 41 | - With duckdb we can run SELECT operations 42 | 43 | 44 | This is just a proof of concept that it's possible to have dbt working as a multi-stack transformation layer. A lot of decisions/code could have been done in a different way. 45 | 46 | 47 | 48 | # How to get it running: 49 | 50 | * Create a bucket you can use externally (GCS, S3...) 51 | * [Configure an external volumen in Snowflake](https://docs.snowflake.com/en/user-guide/tables-iceberg-configure-external-volume-s3). This will use the previous bucket to store the data 52 | * Update the following vars in dbt_project.yml 53 | ```yml 54 | vars: 55 | catalog_iceberg: 'SNOWFLAKE' # Keep as snowflake if you want to use the snowflake catalog 56 | external_volume_iceberg: 'my-external-volume-in-snowflake' # volume name you just created in snowflake 57 | storage_base_url_iceberg: 's3://my-bucket/iceberg' # storage url used as base for accessing through duckdb directly as for now we can't have access to a catalog. This property could probably be dynamic based on models and not global. 58 | ``` 59 | 60 | * Configure your dbt profiles.yml by adding the credentials to connect to snowflake and duckdb, there's an example in this project. 61 | * Profit 62 | 63 | # Extending dbt to work with Polaris 64 | If you want to use Polaris, [follow this tutorial to create a polaris catalog integration](https://other-docs.snowflake.com/polaris/tutorials/polaris-gs#use-case-create-table-using-apache-spark). Then, update the following vars in your dbt_project.yml: 65 | ```yml 66 | vars: 67 | polaris_catalog_sync: 'polaris_catalog_sync' # the polaris catalog name you set when creating the `CREATE OR REPLACE CATALOG INTEGRATION ` 68 | ``` 69 | 70 | # Run dbt 71 | 72 | You just have to decide the --target. Remember that duck doesn't allow writing, so only tests can be run with that engine 73 | 74 | * `dbt run --target snowflake` 75 | * `dbt test --target duckdb` -------------------------------------------------------------------------------- /macros/create_bigquery.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__create_table_as(temporary, relation, compiled_code, language='sql') -%} 2 | {%- if language == 'sql' -%} 3 | {%- set raw_partition_by = config.get('partition_by', none) -%} 4 | {%- set raw_cluster_by = config.get('cluster_by', none) -%} 5 | {%- set sql_header = config.get('sql_header', none) -%} 6 | {%- set table_format = config.get('table_format', 'default') -%} 7 | {%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%} 8 | {%- if partition_config.time_ingestion_partitioning -%} 9 | {%- set columns = get_columns_with_types_in_query_sql(sql) -%} 10 | {%- set table_dest_columns_csv = columns_without_partition_fields_csv(partition_config, columns) -%} 11 | {%- set columns = '(' ~ table_dest_columns_csv ~ ')' -%} 12 | {%- endif -%} 13 | 14 | {{ sql_header if sql_header is not none }} 15 | 16 | create or replace table {{ relation }} 17 | {%- set contract_config = config.get('contract') -%} 18 | {%- if contract_config.enforced -%} 19 | {{ get_assert_columns_equivalent(compiled_code) }} 20 | {{ get_table_columns_and_constraints() }} 21 | {%- set compiled_code = get_select_subquery(compiled_code) %} 22 | {% else %} 23 | {#-- cannot do contracts at the same time as time ingestion partitioning -#} 24 | {{ columns }} 25 | {% endif %} 26 | {%- if table_format == "iceberg" and partition_config is not none-%} 27 | {% do exceptions.raise_compiler_error("Partition by not available in iceberg tables, use cluster by instead") %} 28 | {#-- PARTITION BY cannot be used in iceberg-#} 29 | {%- else -%} 30 | {{ partition_by(partition_config) }} 31 | {% endif %} 32 | {{ cluster_by(raw_cluster_by) }} 33 | {% if table_format == "iceberg" %} 34 | {% set base_location = config.get('base_location') %} 35 | {%- if not base_location-%} 36 | {% do exceptions.raise_compiler_error("base_location not found") %} 37 | {% endif %} 38 | {% set sub_path = relation.identifier %} 39 | {% set bq_connection = var('bq_connection') %} 40 | {% set connection = "WITH CONNECTION `"~bq_connection~"`" %} 41 | {% set options %} 42 | OPTIONS( 43 | file_format = 'PARQUET', 44 | table_format = 'ICEBERG', 45 | storage_uri = '{{base_location}}/{{sub_path}}' 46 | ) 47 | {%- endset -%} 48 | {{- connection -}} 49 | {#-- pass this through {{ bigquery_table_options(config, model, temporary) }}-#} 50 | {{- options -}} 51 | 52 | {% endif %} 53 | 54 | 55 | {#-- PARTITION BY cannot be used with the AS query_statement clause. 56 | https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression 57 | -#} 58 | {%- if not partition_config.time_ingestion_partitioning %} 59 | as ( 60 | {{ compiled_code }} 61 | ); 62 | {%- endif %} 63 | {%- elif language == 'python' -%} 64 | {#-- 65 | N.B. Python models _can_ write to temp views HOWEVER they use a different session 66 | and have already expired by the time they need to be used (I.E. in merges for incremental models) 67 | 68 | TODO: Deep dive into spark sessions to see if we can reuse a single session for an entire 69 | dbt invocation. 70 | --#} 71 | 72 | {#-- when a user wants to change the schema of an existing relation, they must intentionally drop the table in the dataset --#} 73 | {%- set old_relation = adapter.get_relation(database=relation.database, schema=relation.schema, identifier=relation.identifier) -%} 74 | {%- if (old_relation.is_table and (should_full_refresh())) -%} 75 | {% do adapter.drop_relation(relation) %} 76 | {%- endif -%} 77 | {{ py_write_table(compiled_code=compiled_code, target_relation=relation.quote(database=False, schema=False, identifier=False)) }} 78 | {%- else -%} 79 | {% do exceptions.raise_compiler_error("bigquery__create_table_as macro didn't get supported language, it got %s" % language) %} 80 | {%- endif -%} 81 | 82 | {%- endmacro -%} 83 | 84 | {% macro bigquery__create_view_as(relation, sql) -%} 85 | {%- set sql_header = config.get('sql_header', none) -%} 86 | 87 | {{ sql_header if sql_header is not none }} 88 | 89 | create or replace view {{ relation }} 90 | {{ bigquery_view_options(config, model) }} 91 | {%- set contract_config = config.get('contract') -%} 92 | {%- if contract_config.enforced -%} 93 | {{ get_assert_columns_equivalent(sql) }} 94 | {%- endif %} 95 | as {{ sql }}; 96 | 97 | {% endmacro %} 98 | 99 | {% macro bigquery__drop_schema(relation) -%} 100 | {{ adapter.drop_schema(relation) }} 101 | {% endmacro %} 102 | 103 | {% macro bigquery__get_columns_in_relation(relation) -%} 104 | {{ return(adapter.get_columns_in_relation(relation)) }} 105 | {% endmacro %} 106 | 107 | 108 | {% macro bigquery__list_relations_without_caching(schema_relation) -%} 109 | {{ return(adapter.list_relations_without_caching(schema_relation)) }} 110 | {%- endmacro %} 111 | 112 | 113 | {% macro bigquery__list_schemas(database) -%} 114 | {{ return(adapter.list_schemas(database)) }} 115 | {% endmacro %} 116 | 117 | 118 | {% macro bigquery__check_schema_exists(information_schema, schema) %} 119 | {{ return(adapter.check_schema_exists(information_schema.database, schema)) }} 120 | {% endmacro %} 121 | 122 | {#-- relation-level macro is not implemented. This is handled in the CTAs statement #} 123 | {% macro bigquery__persist_docs(relation, model, for_relation, for_columns) -%} 124 | {% if for_columns and config.persist_column_docs() and model.columns %} 125 | {% do alter_column_comment(relation, model.columns) %} 126 | {% endif %} 127 | {% endmacro %} 128 | 129 | {% macro bigquery__alter_column_comment(relation, column_dict) -%} 130 | {% do adapter.update_columns(relation, column_dict) %} 131 | {% endmacro %} 132 | 133 | {% macro bigquery__alter_relation_add_columns(relation, add_columns) %} 134 | 135 | {% set sql -%} 136 | 137 | alter {{ relation.type }} {{ relation }} 138 | {% for column in add_columns %} 139 | add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }} 140 | {% endfor %} 141 | 142 | {%- endset -%} 143 | 144 | {{ return(run_query(sql)) }} 145 | 146 | {% endmacro %} 147 | 148 | {% macro bigquery__alter_relation_drop_columns(relation, drop_columns) %} 149 | 150 | {% set sql -%} 151 | 152 | alter {{ relation.type }} {{ relation }} 153 | 154 | {% for column in drop_columns %} 155 | drop column {{ column.name }}{{ ',' if not loop.last }} 156 | {% endfor %} 157 | 158 | {%- endset -%} 159 | 160 | {{ return(run_query(sql)) }} 161 | 162 | {% endmacro %} 163 | 164 | 165 | {% macro bigquery__alter_column_type(relation, column_name, new_column_type) -%} 166 | {#-- Changing a column's data type using a query requires you to scan the entire table. 167 | The query charges can be significant if the table is very large. 168 | 169 | https://cloud.google.com/bigquery/docs/manually-changing-schemas#changing_a_columns_data_type 170 | #} 171 | {% set relation_columns = get_columns_in_relation(relation) %} 172 | 173 | {% set sql %} 174 | select 175 | {%- for col in relation_columns -%} 176 | {% if col.column == column_name %} 177 | CAST({{ col.quoted }} AS {{ new_column_type }}) AS {{ col.quoted }} 178 | {%- else %} 179 | {{ col.quoted }} 180 | {%- endif %} 181 | {%- if not loop.last %},{% endif -%} 182 | {%- endfor %} 183 | from {{ relation }} 184 | {% endset %} 185 | 186 | {% call statement('alter_column_type') %} 187 | {{ create_table_as(False, relation, sql)}} 188 | {%- endcall %} 189 | 190 | {% endmacro %} 191 | 192 | 193 | {% macro bigquery__test_unique(model, column_name) %} 194 | 195 | with dbt_test__target as ( 196 | 197 | select {{ column_name }} as unique_field 198 | from {{ model }} 199 | where {{ column_name }} is not null 200 | 201 | ) 202 | 203 | select 204 | unique_field, 205 | count(*) as n_records 206 | 207 | from dbt_test__target 208 | group by unique_field 209 | having count(*) > 1 210 | 211 | {% endmacro %} 212 | 213 | {% macro bigquery__upload_file(local_file_path, database, table_schema, table_name) %} 214 | 215 | {{ log("kwargs: " ~ kwargs) }} 216 | 217 | {% do adapter.upload_file(local_file_path, database, table_schema, table_name, kwargs=kwargs) %} 218 | 219 | {% endmacro %} 220 | --------------------------------------------------------------------------------