├── CHANGELOG.md ├── docker ├── trino │ ├── etc │ │ ├── node.properties │ │ ├── config.properties │ │ └── jvm.config │ └── catalog │ │ └── memory.properties ├── starburst │ ├── etc │ │ ├── node.properties │ │ ├── config.properties │ │ └── jvm.config │ └── catalog │ │ └── memory.properties ├── dbt │ ├── build.sh │ ├── profiles.yml │ └── Dockerfile ├── remove_trino.bash ├── remove_starburst.bash ├── init_trino.bash ├── init_starburst.bash ├── run_dbt_date_integration_tests.bash ├── run_dbt_metrics_integration_tests.bash └── run_dbt_utils_integration_tests.bash ├── .gitignore ├── integration_tests ├── dbt_project.yml ├── dbt_utils │ ├── data │ │ └── sql │ │ │ ├── data_deduplicate_expected_trino.csv │ │ │ └── data_deduplicate_trino.csv │ ├── macros │ │ └── limit_zero.sql │ ├── packages.yml │ ├── models │ │ └── sql │ │ │ ├── schema.yml │ │ │ └── test_deduplicate_trino.sql │ └── dbt_project.yml ├── dbt_metrics │ ├── packages.yml │ ├── models │ │ ├── materialized_models │ │ │ └── trino__fact_orders.sql │ │ ├── metric_definitions │ │ │ ├── base_count_metric.yml │ │ │ ├── case_when_metric.yml │ │ │ ├── base_average_metric.yml │ │ │ ├── base_median_metric.yml │ │ │ ├── base_count_distinct_metric.yml │ │ │ └── base_sum_metric.yml │ │ ├── metric_testing_models │ │ │ ├── trino__simple_develop_metric.sql │ │ │ └── trino__develop_metric.sql │ │ └── trino__custom_calendar.sql │ ├── seeds │ │ └── source │ │ │ └── trino__fact_orders_source.csv │ └── dbt_project.yml ├── dbt_date │ ├── packages.yml │ └── dbt_project.yml └── ci │ └── sample.profiles.yml ├── dbt_project.yml ├── macros ├── dbt_date │ └── convert_timezone.sql ├── dbt_utils │ ├── schema_cleanup │ │ ├── drop_schema_by_name.sql │ │ ├── drop_schemas_by_prefixes.sql │ │ └── drop_old_relations.sql │ ├── sql │ │ ├── get_tables_by_pattern_sql.sql │ │ ├── deduplicate.sql │ │ └── date_spine.sql │ └── generic_tests │ │ ├── not_null_proportion.sql │ │ ├── equal_rowcount.sql │ │ └── fewer_rows_than.sql └── dbt_metrics │ └── sql_gen │ └── gen_calendar_join.sql ├── .gitmodules ├── docker-compose-trino.yml ├── docker-compose-starburst.yml ├── Makefile ├── .github └── workflows │ └── ci.yml ├── README.md └── LICENSE /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker/trino/etc/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=docker 2 | node.data-dir=/data/trino 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /**/target/ 2 | /**/dbt_modules/ 3 | /**/dbt_packages/ 4 | /**/logs/ 5 | /**/env/ 6 | -------------------------------------------------------------------------------- /docker/starburst/etc/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=docker 2 | node.data-dir=/data/starburst 3 | -------------------------------------------------------------------------------- /docker/dbt/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker build . -f docker/dbt/Dockerfile -t dbt-trino-utils 4 | -------------------------------------------------------------------------------- /docker/starburst/catalog/memory.properties: -------------------------------------------------------------------------------- 1 | connector.name=memory 2 | memory.max-data-per-node=128MB 3 | -------------------------------------------------------------------------------- /docker/trino/catalog/memory.properties: -------------------------------------------------------------------------------- 1 | connector.name=memory 2 | memory.max-data-per-node=128MB 3 | -------------------------------------------------------------------------------- /integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'trino_utils_integration_tests' 2 | version: '0.1.0' 3 | config-version: 2 -------------------------------------------------------------------------------- /integration_tests/dbt_utils/data/sql/data_deduplicate_expected_trino.csv: -------------------------------------------------------------------------------- 1 | user_id,event,version,rn 2 | 1,play,2,1 3 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/data/sql/data_deduplicate_trino.csv: -------------------------------------------------------------------------------- 1 | user_id,event,version 2 | 1,play,1 3 | 1,play,2 4 | 2,pause,1 5 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/macros/limit_zero.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__limit_zero() %} 2 | {{ return('where 0=1') }} 3 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/dbt_utils/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../../ 3 | - local: ../../dbt-utils 4 | - local: ../../dbt-utils/integration_tests -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'trino_utils' 2 | version: '0.3.0' 3 | config-version: 2 4 | require-dbt-version: [">=1.3.0", "<2.0.0"] 5 | macro-paths: ["macros"] 6 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../../ 3 | - local: ../../dbt_metrics 4 | - local: ../../dbt_metrics/integration_tests 5 | -------------------------------------------------------------------------------- /macros/dbt_date/convert_timezone.sql: -------------------------------------------------------------------------------- 1 | {%- macro trino__convert_timezone(column, target_tz, source_tz=None) -%} 2 | {{ column }} at time zone '{{ target_tz }}' 3 | {%- endmacro -%} 4 | -------------------------------------------------------------------------------- /integration_tests/dbt_date/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../../ 3 | - local: ../../dbt-date 4 | - local: ../../dbt-date/integration_tests 5 | - local: ../../dbt-utils 6 | -------------------------------------------------------------------------------- /docker/remove_trino.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | cd .. 6 | docker-compose -f docker-compose-trino.yml down 7 | -------------------------------------------------------------------------------- /docker/remove_starburst.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | cd .. 6 | docker-compose -f docker-compose-starburst.yml down 7 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/materialized_models/trino__fact_orders.sql: -------------------------------------------------------------------------------- 1 | select 2 | * 3 | ,round(order_total - (order_total/2)) as discount_total 4 | from {{ref('trino__fact_orders_source')}} 5 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/models/sql/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_deduplicate_trino 5 | tests: 6 | - dbt_utils.equality: 7 | compare_model: ref('data_deduplicate_expected_trino') 8 | -------------------------------------------------------------------------------- /macros/dbt_utils/schema_cleanup/drop_schema_by_name.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__drop_schema_by_name(schema_name) %} 2 | {% set relation = api.Relation.create(database=target.database, schema=schema_name) %} 3 | {% do drop_schema(relation) %} 4 | {% endmacro %} -------------------------------------------------------------------------------- /docker/trino/etc/config.properties: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator=true 3 | http-server.http.port=8080 4 | query.max-memory=1GB 5 | query.max-memory-per-node=200MB 6 | discovery-server.enabled=true 7 | discovery.uri=http://localhost:8080 8 | -------------------------------------------------------------------------------- /docker/starburst/etc/config.properties: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator=true 3 | http-server.http.port=8080 4 | query.max-memory=1GB 5 | query.max-memory-per-node=200MB 6 | discovery-server.enabled=true 7 | discovery.uri=http://localhost:8080 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dbt-utils"] 2 | path = dbt-utils 3 | url = git@github.com:dbt-labs/dbt-utils.git 4 | [submodule "dbt-date"] 5 | path = dbt-date 6 | url = https://github.com/calogica/dbt-date.git 7 | [submodule "dbt_metrics"] 8 | path = dbt_metrics 9 | url = https://github.com/dbt-labs/dbt_metrics.git 10 | -------------------------------------------------------------------------------- /docker/dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | integration_tests: 2 | target: dev 3 | outputs: 4 | dev: 5 | type: trino 6 | method: none 7 | user: admin 8 | catalog: memory 9 | host: trino 10 | port: 8080 11 | schema: default 12 | threads: 1 13 | config: 14 | send_anonymous_usage_stats: False 15 | -------------------------------------------------------------------------------- /docker-compose-trino.yml: -------------------------------------------------------------------------------- 1 | version: "3.5" 2 | services: 3 | trino: 4 | ports: 5 | - "8080:8080" 6 | image: "trinodb/trino:423" 7 | volumes: 8 | - ./docker/trino/etc:/usr/lib/trino/etc:ro 9 | - ./docker/trino/catalog:/etc/trino/catalog 10 | 11 | networks: 12 | default: 13 | external: 14 | name: dbt-net 15 | -------------------------------------------------------------------------------- /docker-compose-starburst.yml: -------------------------------------------------------------------------------- 1 | version: "3.5" 2 | services: 3 | trino: 4 | ports: 5 | - "8080:8080" 6 | image: "starburstdata/starburst-enterprise:422-e" 7 | volumes: 8 | - ./docker/starburst/etc:/etc/starburst 9 | - ./docker/starburst/catalog:/etc/starburst/catalog 10 | 11 | networks: 12 | default: 13 | external: 14 | name: dbt-net 15 | -------------------------------------------------------------------------------- /docker/trino/etc/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -Xmx1G 3 | -XX:-UseBiasedLocking 4 | -XX:+UseG1GC 5 | -XX:G1HeapRegionSize=32M 6 | -XX:+ExplicitGCInvokesConcurrent 7 | -XX:+HeapDumpOnOutOfMemoryError 8 | -XX:+UseGCOverheadLimit 9 | -XX:+ExitOnOutOfMemoryError 10 | -XX:ReservedCodeCacheSize=256M 11 | -XX:-OmitStackTraceInFastThrow 12 | -Djdk.attach.allowAttachSelf=true 13 | -Djdk.nio.maxCachedBufferSize=2000000 -------------------------------------------------------------------------------- /docker/starburst/etc/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -Xmx1G 3 | -XX:-UseBiasedLocking 4 | -XX:+UseG1GC 5 | -XX:G1HeapRegionSize=32M 6 | -XX:+ExplicitGCInvokesConcurrent 7 | -XX:+HeapDumpOnOutOfMemoryError 8 | -XX:+UseGCOverheadLimit 9 | -XX:+ExitOnOutOfMemoryError 10 | -XX:ReservedCodeCacheSize=256M 11 | -XX:-OmitStackTraceInFastThrow 12 | -Djdk.attach.allowAttachSelf=true 13 | -Djdk.nio.maxCachedBufferSize=2000000 -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_definitions/base_count_metric.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | metrics: 3 | - name: base_count_metric 4 | model: ref('trino__fact_orders') 5 | label: Total Discount ($) 6 | timestamp: order_date 7 | time_grains: [day, week, month] 8 | calculation_method: count 9 | expression: order_total 10 | dimensions: 11 | - had_discount 12 | - order_country 13 | -------------------------------------------------------------------------------- /integration_tests/ci/sample.profiles.yml: -------------------------------------------------------------------------------- 1 | # HEY! This file is used in the tsql_utils integrations tests with CircleCI. 2 | # You should __NEVER__ check credentials into version control. Thanks for reading :) 3 | 4 | config: 5 | send_anonymous_usage_stats: False 6 | use_colors: True 7 | 8 | integration_tests: 9 | target: trino 10 | outputs: 11 | 12 | trino: 13 | type: trino 14 | 15 | threads: 4 -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_definitions/case_when_metric.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | metrics: 3 | - name: case_when_metric 4 | model: ref('trino__fact_orders') 5 | label: Order Total ($) 6 | timestamp: order_date 7 | time_grains: [day, week, month] 8 | calculation_method: sum 9 | expression: case when had_discount = true then 1 else 0 end 10 | dimensions: 11 | - order_country 12 | -------------------------------------------------------------------------------- /docker/init_trino.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | cd .. 6 | 7 | set -exo pipefail 8 | 9 | docker-compose -f docker-compose-trino.yml build 10 | docker-compose -f docker-compose-trino.yml up -d 11 | timeout 5m bash -c -- 'while ! docker-compose -f docker-compose-trino.yml logs trino 2>&1 | tail -n 1 | grep "SERVER STARTED"; do sleep 2; done' 12 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_definitions/base_average_metric.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | metrics: 3 | - name: base_average_metric 4 | model: ref('trino__fact_orders') 5 | label: Total Discount ($) 6 | timestamp: order_date 7 | time_grains: [day, week, month, test] 8 | calculation_method: average 9 | expression: discount_total 10 | dimensions: 11 | - had_discount 12 | - order_country 13 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_definitions/base_median_metric.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | metrics: 3 | - name: base_median_metric 4 | model: ref('trino__fact_orders') 5 | label: Total Discount ($) 6 | timestamp: order_date 7 | time_grains: [day, week, month, all_time] 8 | calculation_method: median 9 | expression: discount_total 10 | dimensions: 11 | - had_discount 12 | - order_country 13 | -------------------------------------------------------------------------------- /docker/init_starburst.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | cd .. 6 | 7 | set -exo pipefail 8 | 9 | docker-compose -f docker-compose-starburst.yml build 10 | docker-compose -f docker-compose-starburst.yml up -d 11 | timeout 5m bash -c -- 'while ! docker-compose -f docker-compose-starburst.yml logs trino 2>&1 | tail -n 1 | grep "SERVER STARTED"; do sleep 2; done' 12 | -------------------------------------------------------------------------------- /docker/run_dbt_date_integration_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | 6 | set -exo pipefail 7 | docker run \ 8 | --network="dbt-net" \ 9 | -v $PWD/dbt:/root/.dbt \ 10 | dbt-trino-utils \ 11 | "cd /opt/dbt_trino_utils/integration_tests/dbt_date \ 12 | && dbt deps \ 13 | && dbt seed \ 14 | && dbt run \ 15 | && dbt test" 16 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/models/sql/test_deduplicate_trino.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | source as ( 4 | select * 5 | from {{ ref('data_deduplicate_trino') }} 6 | where user_id = 1 7 | ), 8 | 9 | deduped as ( 10 | 11 | {{ 12 | dbt_utils.deduplicate( 13 | 'source', 14 | partition_by='user_id', 15 | order_by='version desc', 16 | ) | indent 17 | }} 18 | 19 | ) 20 | 21 | select * from deduped 22 | -------------------------------------------------------------------------------- /docker/run_dbt_metrics_integration_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | 6 | set -exo pipefail 7 | docker run \ 8 | --network="dbt-net" \ 9 | -v $PWD/dbt:/root/.dbt \ 10 | dbt-trino-utils \ 11 | "cd /opt/dbt_trino_utils/integration_tests/dbt_metrics \ 12 | && dbt deps \ 13 | && dbt seed \ 14 | && dbt run \ 15 | && dbt test" 16 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/seeds/source/trino__fact_orders_source.csv: -------------------------------------------------------------------------------- 1 | order_id,order_country,order_total,had_discount,customer_id,order_date 2 | 1,Russia,2,false,1,2022-01-28 3 | 2,Mauritius,1,false,2,2022-01-20 4 | 3,Peru,1,false,1,2022-01-13 5 | 4,Kazakhstan,1,true,3,2022-01-06 6 | 5,Portugal,1,false,4,2022-01-08 7 | 6,China,1,false,5,2022-01-21 8 | 7,Germany,1,true,2,2022-01-22 9 | 8,Greenland,1,true,1,2022-02-15 10 | 9,Bangladesh,1,false,2,2022-02-03 11 | 10,Sweden,1,false,3,2022-02-13 12 | -------------------------------------------------------------------------------- /macros/dbt_metrics/sql_gen/gen_calendar_join.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__gen_calendar_join(group_values) %} 2 | left join calendar 3 | {%- if group_values.window is not none %} 4 | on cast(base_model.{{group_values.timestamp}} as date) > date_add('{{group_values.window.period}}', -{{group_values.window.count}}, calendar.date_day) 5 | and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day 6 | {%- else %} 7 | on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day 8 | {% endif -%} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /docker/run_dbt_utils_integration_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # move to wherever we are so docker things work 4 | cd "$(dirname "${BASH_SOURCE[0]}")" 5 | 6 | set -exo pipefail 7 | # run the dim_order_dates model two times in order to test incremental functionality 8 | # Do the same for the the incremental model in custom schema 9 | docker run \ 10 | --network="dbt-net" \ 11 | -v $PWD/dbt:/root/.dbt \ 12 | dbt-trino-utils \ 13 | "cd /opt/dbt_trino_utils/integration_tests/dbt_utils \ 14 | && dbt deps \ 15 | && dbt seed \ 16 | && dbt run \ 17 | && dbt test" -------------------------------------------------------------------------------- /macros/dbt_utils/sql/get_tables_by_pattern_sql.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %} 2 | select distinct 3 | table_schema as {{ adapter.quote('table_schema') }}, 4 | table_name as {{ adapter.quote('table_name') }}, 5 | {{ dbt_utils.get_table_types_sql() }} 6 | from {{ database }}.information_schema.tables 7 | where lower(table_schema) like lower('{{ schema_pattern }}') 8 | and lower(table_name) like lower('{{ table_pattern }}') 9 | and lower(table_name) not like lower('{{ exclude }}') 10 | {% endmacro %} 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | dbt-trino-tests: 2 | docker network create dbt-net || true 3 | ./docker/dbt/build.sh 4 | ./docker/init_trino.bash 5 | ./docker/run_dbt_utils_integration_tests.bash 6 | ./docker/run_dbt_date_integration_tests.bash 7 | ./docker/run_dbt_metrics_integration_tests.bash 8 | ./docker/remove_trino.bash 9 | 10 | dbt-starburst-tests: 11 | docker network create dbt-net || true 12 | ./docker/dbt/build.sh 13 | ./docker/init_starburst.bash 14 | ./docker/run_dbt_utils_integration_tests.bash 15 | ./docker/run_dbt_date_integration_tests.bash 16 | ./docker/run_dbt_metrics_integration_tests.bash 17 | ./docker/remove_starburst.bash 18 | -------------------------------------------------------------------------------- /docker/dbt/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.9-slim-bullseye 2 | USER root 3 | 4 | ENV DBT_DIR /opt/dbt_trino_utils 5 | 6 | RUN apt-get update && \ 7 | apt-get dist-upgrade -y && \ 8 | apt-get install -y --no-install-recommends \ 9 | netcat curl git ssh software-properties-common \ 10 | make build-essential ca-certificates libpq-dev && \ 11 | apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/ 12 | RUN pip3 install --upgrade pip 13 | RUN mkdir ${DBT_DIR} 14 | RUN pip install 'dbt-trino~=1.4.0' 15 | ADD . ${DBT_DIR} 16 | ADD docker/dbt/profiles.yml /root/.dbt/profiles.yml 17 | 18 | WORKDIR /usr/app 19 | 20 | ENTRYPOINT ["/bin/sh", "-c"] 21 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_definitions/base_count_distinct_metric.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | metrics: 4 | - name: base_count_distinct_metric 5 | model: ref('trino__fact_orders') 6 | label: Count Distinct 7 | timestamp: order_date 8 | time_grains: [day, week, month] 9 | calculation_method: count_distinct 10 | expression: customer_id 11 | dimensions: 12 | - had_discount 13 | - order_country 14 | window: 15 | count: 14 16 | period: month 17 | filters: 18 | - field: had_discount 19 | operator: 'is' 20 | value: 'true' 21 | - field: order_country 22 | operator: '=' 23 | value: "'CA'" 24 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_testing_models/trino__simple_develop_metric.sql: -------------------------------------------------------------------------------- 1 | {% set my_metric_yml -%} 2 | 3 | metrics: 4 | - name: develop_metric 5 | model: ref('trino__fact_orders') 6 | label: Total Discount ($) 7 | timestamp: order_date 8 | time_grains: [day, week, month] 9 | calculation_method: average 10 | expression: discount_total 11 | dimensions: 12 | - had_discount 13 | - order_country 14 | config: 15 | treat_null_values_as_zero: false 16 | 17 | {%- endset %} 18 | 19 | select * 20 | from {{ metrics.develop( 21 | develop_yml=my_metric_yml, 22 | metric_list='develop_metric', 23 | grain='month' 24 | ) 25 | }} 26 | -------------------------------------------------------------------------------- /macros/dbt_utils/sql/deduplicate.sql: -------------------------------------------------------------------------------- 1 | {%- macro trino__deduplicate(relation, partition_by, order_by) -%} 2 | {# 3 | -- This is a temporary solution until https://github.com/trinodb/trino/issues/14455 is resolved 4 | -- It leaks the rn column compared to the NATURAL JOIN solution from dbt 5 | #} 6 | with row_numbered as ( 7 | select 8 | _inner.*, 9 | row_number() over ( 10 | partition by {{ partition_by }} 11 | order by {{ order_by }} 12 | ) as rn 13 | from {{ relation }} as _inner 14 | ) 15 | 16 | select 17 | distinct row_numbered.* 18 | from row_numbered where row_numbered.rn = 1 19 | 20 | {%- endmacro -%} 21 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: dbt-trino tests 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths-ignore: 7 | - "**/*.md" 8 | pull_request: 9 | branches: 10 | - main 11 | paths-ignore: 12 | - "**/*.md" 13 | 14 | jobs: 15 | test: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | type: 21 | - "trino" 22 | - "starburst" 23 | 24 | steps: 25 | - name: Checkout 26 | uses: actions/checkout@v3 27 | with: 28 | submodules: 'true' 29 | 30 | - name: Setup Python 31 | uses: actions/setup-python@v4 32 | with: 33 | python-version: "3.11" 34 | 35 | - name: Run dbt-trino tests against ${{ matrix.type }} 36 | run: make dbt-${{ matrix.type }}-tests 37 | 38 | - name: Remove container on failure 39 | if: failure() 40 | run: ./docker/remove_${{ matrix.type }}.bash || true -------------------------------------------------------------------------------- /macros/dbt_utils/sql/date_spine.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__date_spine(datepart, start_date, end_date) %} 2 | 3 | 4 | {# call as follows: 5 | 6 | date_spine( 7 | "day", 8 | "to_date('01/01/2016', 'mm/dd/yyyy')", 9 | "dbt.dateadd(week, 1, current_date)" 10 | ) #} 11 | 12 | 13 | with rawdata as ( 14 | 15 | {{dbt_utils.generate_series( 16 | dbt_utils.get_intervals_between(start_date, end_date, datepart) 17 | )}} 18 | 19 | ), 20 | 21 | all_periods as ( 22 | 23 | select ( 24 | {{ 25 | dbt.dateadd( 26 | datepart, 27 | "row_number() over (order by 1) - 1", 28 | "cast(cast(" ~ start_date ~ " as date) as timestamp)" 29 | ) 30 | }} 31 | ) as date_{{datepart}} 32 | from rawdata 33 | 34 | ), 35 | 36 | filtered as ( 37 | 38 | select * 39 | from all_periods 40 | where date_{{datepart}} <= cast(cast({{ end_date }} as date) as timestamp) 41 | 42 | ) 43 | 44 | select * from filtered 45 | 46 | {% endmacro %} 47 | -------------------------------------------------------------------------------- /macros/dbt_utils/generic_tests/not_null_proportion.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__test_not_null_proportion(model, group_by_columns) %} 2 | 3 | {% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} 4 | {% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} 5 | {% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} 6 | 7 | {% if group_by_columns|length() > 0 %} 8 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 9 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 10 | {% endif %} 11 | 12 | with validation as ( 13 | select 14 | {{select_gb_cols}} 15 | sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as {{dbt.type_numeric()}}) as not_null_proportion 16 | from {{ model }} 17 | {{groupby_gb_cols}} 18 | ), 19 | validation_errors as ( 20 | select 21 | {{select_gb_cols}} 22 | not_null_proportion 23 | from validation 24 | where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} 25 | ) 26 | select 27 | * 28 | from validation_errors 29 | 30 | {% endmacro %} 31 | -------------------------------------------------------------------------------- /integration_tests/dbt_date/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "trino_utils_dbt_date_integration_tests" 2 | version: "1.0" 3 | 4 | profile: "integration_tests" 5 | 6 | config-version: 2 7 | 8 | model-paths: ["models"] 9 | test-paths: ["tests"] 10 | seed-paths: ["data"] 11 | macro-paths: ["macros"] 12 | 13 | target-path: "target" 14 | clean-targets: ["target", "dbt_modules", "dbt_packages"] 15 | 16 | dispatch: 17 | - macro_namespace: dbt_utils 18 | search_order: ['trino_utils_dbt_utils_integration_tests', 'trino_utils', 'dbt_utils'] 19 | - macro_namespace: dbt_date 20 | search_order: ['trino_utils_dbt_date_integration_tests', 'trino_utils', 'dbt_date'] 21 | 22 | vars: 23 | "dbt_date:time_zone": "UTC" 24 | 25 | quoting: 26 | database: false 27 | identifier: false 28 | schema: false 29 | 30 | models: 31 | dbt_date_integration_tests: 32 | schema: dbt_date_integration_tests 33 | materialized: table 34 | 35 | # TODO: Enable tests while adding Trino shims 36 | dates: 37 | +enabled: false 38 | dim_date: 39 | +enabled: false 40 | dim_date_fiscal: 41 | +enabled: false 42 | dim_hour: 43 | +enabled: false 44 | dim_week: 45 | +enabled: true 46 | test_dates: 47 | +enabled: false 48 | -------------------------------------------------------------------------------- /macros/dbt_utils/schema_cleanup/drop_schemas_by_prefixes.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__drop_schemas_by_prefixes(prefixes) %} 2 | {# Ensure input is a list to iterate later #} 3 | {% set prefix_list = [prefixes] if prefixes is string else prefixes %} 4 | 5 | {% for prefix in prefix_list %} 6 | {# Fetch all schemas that use the current prefix #} 7 | {% do log('Fetching schemas for ' + prefix + '...', info=True) %} 8 | {% set schemas_table %} 9 | select schema_name 10 | from "{{ target.database }}"."information_schema"."schemata" 11 | where schema_name LIKE '{{prefix}}%' 12 | {% endset %} 13 | {% set schema_names = run_query(schemas_table).columns[0].values() %} 14 | 15 | {# Test if results are empty #} 16 | {% if schema_names is none or schema_names|length == 0 %} 17 | {% do log('None found.', info=True) %} 18 | {% else %} 19 | {# Drop each found schema #} 20 | {% for schema_name in schema_names %} 21 | {% do log('Dropping schema ' + schema_name, info=True) %} 22 | {% do trino__drop_schema_by_name(schema_name) %} 23 | {% endfor %} 24 | {% endif %} 25 | {% endfor %} 26 | 27 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_testing_models/trino__develop_metric.sql: -------------------------------------------------------------------------------- 1 | {% set my_metric_yml -%} 2 | {% raw %} 3 | 4 | metrics: 5 | - name: develop_metric 6 | model: ref('trino__fact_orders') 7 | label: Total Discount ($) 8 | timestamp: order_date 9 | time_grains: [day, week, month] 10 | calculation_method: average 11 | expression: discount_total 12 | dimensions: 13 | - had_discount 14 | - order_country 15 | 16 | - name: derived_metric 17 | label: Total Discount ($) 18 | timestamp: order_date 19 | time_grains: [day, week, month] 20 | calculation_method: derived 21 | expression: "{{ metric('develop_metric') }} - 1 " 22 | dimensions: 23 | - had_discount 24 | - order_country 25 | 26 | - name: some_other_metric_not_using 27 | label: Total Discount ($) 28 | timestamp: order_date 29 | time_grains: [day, week, month] 30 | calculation_method: derived 31 | expression: "{{ metric('derived_metric') }} - 1 " 32 | dimensions: 33 | - had_discount 34 | - order_country 35 | 36 | {% endraw %} 37 | {%- endset %} 38 | 39 | select * 40 | from {{ metrics.develop( 41 | develop_yml=my_metric_yml, 42 | metric_list=['derived_metric'], 43 | grain='month' 44 | ) 45 | }} 46 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/metric_definitions/base_sum_metric.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | metrics: 3 | - name: base_sum_metric 4 | model: ref('trino__fact_orders') 5 | label: Order Total ($) 6 | timestamp: order_date 7 | time_grains: [day, week, month] 8 | calculation_method: sum 9 | expression: order_total 10 | dimensions: 11 | - had_discount 12 | - order_country 13 | config: 14 | restrict_no_time_grain: True 15 | 16 | - name: base_sum_metric_duplicate 17 | model: ref('fact_orders_duplicate') 18 | label: Order Total ($) 19 | timestamp: order_date 20 | time_grains: [day, week, month] 21 | calculation_method: sum 22 | expression: order_total 23 | dimensions: 24 | - had_discount 25 | - order_country 26 | 27 | - name: base_sum_metric__14_day_window 28 | model: ref('trino__fact_orders') 29 | label: Order Total ($) 30 | timestamp: order_date 31 | time_grains: [day, week, month] 32 | calculation_method: sum 33 | expression: order_total 34 | window: 35 | count: 14 36 | period: month 37 | dimensions: 38 | - had_discount 39 | - order_country 40 | 41 | - name: base_test_metric 42 | model: ref('fact_orders') 43 | label: Order Total ($) 44 | timestamp: order_date 45 | time_grains: [day, week, month] 46 | calculation_method: sum 47 | expression: order_total 48 | dimensions: 49 | - had_discount 50 | - order_country 51 | 52 | - name: base_sum_metric__no_timestamp 53 | model: ref('fact_orders') 54 | label: Order Total ($) 55 | calculation_method: sum 56 | expression: order_total 57 | dimensions: 58 | - had_discount 59 | - order_country 60 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'trino_utils_dbt_utils_integration_tests' 2 | version: '1.0' 3 | 4 | profile: 'integration_tests' 5 | 6 | # require-dbt-version: inherit this from dbt-utils 7 | 8 | config-version: 2 9 | 10 | model-paths: ["models"] 11 | analysis-paths: ["analysis"] 12 | test-paths: ["tests"] 13 | seed-paths: ["data"] 14 | macro-paths: ["macros"] 15 | 16 | target-path: "target" # directory which will store compiled SQL files 17 | clean-targets: # directories to be removed by `dbt clean` 18 | - "target" 19 | - "dbt_modules" 20 | - "dbt_packages" 21 | 22 | dispatch: 23 | - macro_namespace: dbt_utils 24 | search_order: ['trino_utils_dbt_utils_integration_tests', 'trino_utils', 'dbt_utils'] 25 | 26 | models: 27 | dbt_utils_integration_tests: 28 | +enabled: true 29 | 30 | cross_db_utils: 31 | test_datediff: 32 | # tested in adapter tests 33 | +enabled: false 34 | 35 | generic_tests: 36 | # does unsupported cast, check with dbt labs 37 | test_recency: 38 | +enabled: false 39 | 40 | materializations: 41 | # requires transactional support 42 | test_insert_by_period: 43 | +enabled: false 44 | 45 | sql: 46 | # requires natural join support, workaround leaking additional column 47 | test_deduplicate: 48 | +enabled: false 49 | test_deduplicate_deprecated: 50 | +enabled: false 51 | 52 | seeds: 53 | +quote_columns: false 54 | dbt_utils_integration_tests: 55 | sql: 56 | data_get_single_value: 57 | +column_types: 58 | date_value: timestamp 59 | float_value: double 60 | int_value: integer 61 | 62 | data_width_bucket: 63 | +column_types: 64 | num_buckets: integer 65 | min_value: double 66 | max_value: double 67 | 68 | schema_tests: 69 | data_test_mutually_exclusive_ranges_with_gaps: 70 | +enabled: false -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "trino_utils_dbt_metrics_integration_tests" 2 | version: "1.0.0" 3 | config-version: 2 4 | 5 | profile: "integration_tests" 6 | 7 | model-paths: ["models"] 8 | analysis-paths: ["analyses"] 9 | test-paths: ["tests"] 10 | seed-paths: ["seeds"] 11 | macro-paths: ["macros"] 12 | snapshot-paths: ["snapshots"] 13 | 14 | target-path: "target" 15 | clean-targets: 16 | - "target" 17 | - "dbt_packages" 18 | - "logs" 19 | 20 | dispatch: 21 | - macro_namespace: metrics 22 | search_order: ['trino_utils_dbt_metrics_integration_tests', 'trino_utils', 'metrics'] 23 | 24 | models: 25 | 26 | trino_utils_dbt_metrics_integration_tests: 27 | metric_testing_models: 28 | +materialized: table 29 | 30 | dbt_metrics_integration_tests: 31 | 32 | # Overridden by trino__custom_calendar 33 | custom_calendar: 34 | +enabled: false 35 | 36 | metric_testing_models: 37 | +materialized: table 38 | 39 | # no median function in Trino 40 | base_median_metric: 41 | +enabled: false 42 | base_median_metric_no_time_grain: 43 | +enabled: false 44 | 45 | # no 'is true' predicate in trino 46 | base_count_distinct_metric: 47 | +enabled: false 48 | derived_metric: 49 | +enabled: false 50 | 51 | # Overridden by trino__develop_metric 52 | develop_metric: 53 | +enabled: false 54 | # Overridden by trino__simple_develop_metric 55 | simple_develop_metric: 56 | +enabled: false 57 | 58 | # TODO: Fix and enable 59 | base_count_metric__secondary_calculations: 60 | +enabled: false 61 | base_sum_metric__prior: 62 | +enabled: false 63 | multiple_metrics__period_over_period: 64 | +enabled: false 65 | multiple_metrics__period_to_date: 66 | +enabled: false 67 | multiple_metrics__rolling: 68 | +enabled: false 69 | # issue with base_sum_metric.yml 70 | # with config: restrict_no_time_grain 71 | base_sum_metric: 72 | +enabled: false 73 | ratio_metric: 74 | +enabled: false 75 | 76 | materialized_models: 77 | +materialized: table 78 | 79 | vars: 80 | dbt_metrics_calendar_model: trino__custom_calendar 81 | custom_calendar_dimension_list: ["is_weekend"] 82 | -------------------------------------------------------------------------------- /macros/dbt_utils/generic_tests/equal_rowcount.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__test_equal_rowcount(model, compare_model, group_by_columns) %} 2 | 3 | {#-- Needs to be set at parse time, before we return '' below --#} 4 | {{ config(fail_calc = 'sum(coalesce(diff_count, 0))') }} 5 | 6 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 7 | {%- if not execute -%} 8 | {{ return('') }} 9 | {% endif %} 10 | 11 | {% if group_by_columns|length() > 0 %} 12 | {% set select_gb_cols = group_by_columns|join(', ') + ', ' %} 13 | {% set join_gb_cols %} 14 | {% for c in group_by_columns %} 15 | and a.{{c}} = b.{{c}} 16 | {% endfor %} 17 | {% endset %} 18 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 19 | {% endif %} 20 | 21 | {#-- We must add a fake join key in case additional grouping variables are not provided --#} 22 | {#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#} 23 | {#-- The same logic is used in fewer_rows_than. In case of changes, maintain consistent logic --#} 24 | {% set group_by_columns = ['id_dbtutils_test_equal_rowcount'] + group_by_columns %} 25 | {#-- Instead of specifying columns names passed to GROUP BY clause, pass columns indexes. --#} 26 | {#-- This workaround is needed because Trino doesn't support lateral column aliasing--#} 27 | {% set groupby_gb_cols = dbt_utils.group_by(group_by_columns|length) %} 28 | 29 | with a as ( 30 | 31 | select 32 | {{select_gb_cols}} 33 | 1 as id_dbtutils_test_equal_rowcount, 34 | count(*) as count_a 35 | from {{ model }} 36 | {{groupby_gb_cols}} 37 | 38 | 39 | ), 40 | b as ( 41 | 42 | select 43 | {{select_gb_cols}} 44 | 1 as id_dbtutils_test_equal_rowcount, 45 | count(*) as count_b 46 | from {{ compare_model }} 47 | {{groupby_gb_cols}} 48 | 49 | ), 50 | final as ( 51 | 52 | select 53 | 54 | {% for c in group_by_columns -%} 55 | a.{{c}} as {{c}}_a, 56 | b.{{c}} as {{c}}_b, 57 | {% endfor %} 58 | 59 | count_a, 60 | count_b, 61 | abs(count_a - count_b) as diff_count 62 | 63 | from a 64 | full join b 65 | on 66 | a.id_dbtutils_test_equal_rowcount = b.id_dbtutils_test_equal_rowcount 67 | {{join_gb_cols}} 68 | 69 | 70 | ) 71 | 72 | select * from final 73 | 74 | {% endmacro %} 75 | -------------------------------------------------------------------------------- /macros/dbt_utils/schema_cleanup/drop_old_relations.sql: -------------------------------------------------------------------------------- 1 | -- based on the sqlserver_utils package 2 | 3 | {% macro trino__drop_old_relations(dry_run='false') %} 4 | {% if execute %} 5 | {% set current_models = [] %} 6 | {% for node in graph.nodes.values()|selectattr("resource_type", "in", ["model", "seed", "snapshot"])%} 7 | {% do current_models.append(node.name) %} 8 | {% endfor %} 9 | {% endif %} 10 | {% set cleanup_query %} 11 | with models_to_drop as ( 12 | select 13 | case 14 | when table_type = 'BASE TABLE' then 'TABLE' 15 | when table_type = 'VIEW' then 'VIEW' 16 | end as relation_type, 17 | CASE 18 | WHEN table_type = 'VIEW' THEN concat_ws('.', table_schema, table_name) 19 | ELSE concat_ws('.', table_catalog, table_schema, table_name) 20 | END as relation_name 21 | from 22 | "{{ target.database }}".information_schema.tables -- Added quotes for any whitespace in target db 23 | where 24 | table_schema like '{{ target.schema }}%' 25 | and table_name not in ( 26 | {%- for model in current_models -%} 27 | '{{ model }}' 28 | {%- if not loop.last -%} 29 | , 30 | {% endif %} 31 | {%- endfor -%}) 32 | ) 33 | select 34 | CONCAT( 'drop ' , relation_type , ' ' , relation_name , ';' ) as drop_commands 35 | from 36 | models_to_drop 37 | where 38 | CONCAT( 'drop ' , relation_type , ' ' , relation_name , ';' ) is not null 39 | {% endset %} 40 | 41 | {% do log(cleanup_query, info=True) %} 42 | {% set drop_commands = run_query(cleanup_query).columns[0].values() %} 43 | 44 | {% do log('dry_run: ' + dry_run|string, info=True) %} 45 | 46 | {% if drop_commands %} 47 | {% for drop_command in drop_commands %} 48 | {% do log(drop_command, info=True) %} 49 | {% if dry_run == 'false' %} 50 | {% do run_query(drop_command) %} 51 | {% endif %} 52 | {% endfor %} 53 | {% else %} 54 | {% do log('No relations to clean.', info=True) %} 55 | {% endif %} 56 | {%- endmacro -%} -------------------------------------------------------------------------------- /macros/dbt_utils/generic_tests/fewer_rows_than.sql: -------------------------------------------------------------------------------- 1 | {% macro trino__test_fewer_rows_than(model, compare_model, group_by_columns) %} 2 | 3 | {{ config(fail_calc = 'sum(coalesce(row_count_delta, 0))') }} 4 | 5 | {% if group_by_columns|length() > 0 %} 6 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 7 | {% set join_gb_cols %} 8 | {% for c in group_by_columns %} 9 | and a.{{c}} = b.{{c}} 10 | {% endfor %} 11 | {% endset %} 12 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 13 | {% endif %} 14 | 15 | {#-- We must add a fake join key in case additional grouping variables are not provided --#} 16 | {#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#} 17 | {#-- The same logic is used in equal_rowcount. In case of changes, maintain consistent logic --#} 18 | {% set group_by_columns = ['id_dbtutils_test_fewer_rows_than'] + group_by_columns %} 19 | {#-- Instead of specifying columns names passed to GROUP BY clause, pass columns indexes. --#} 20 | {#-- This workaround is needed because Trino doesn't support lateral column aliasing--#} 21 | {% set groupby_gb_cols = dbt_utils.group_by(group_by_columns|length) %} 22 | 23 | 24 | with a as ( 25 | 26 | select 27 | {{select_gb_cols}} 28 | 1 as id_dbtutils_test_fewer_rows_than, 29 | count(*) as count_our_model 30 | from {{ model }} 31 | {{ groupby_gb_cols }} 32 | 33 | ), 34 | b as ( 35 | 36 | select 37 | {{select_gb_cols}} 38 | 1 as id_dbtutils_test_fewer_rows_than, 39 | count(*) as count_comparison_model 40 | from {{ compare_model }} 41 | {{ groupby_gb_cols }} 42 | 43 | ), 44 | counts as ( 45 | 46 | select 47 | 48 | {% for c in group_by_columns -%} 49 | a.{{c}} as {{c}}_a, 50 | b.{{c}} as {{c}}_b, 51 | {% endfor %} 52 | 53 | count_our_model, 54 | count_comparison_model 55 | from a 56 | full join b on 57 | a.id_dbtutils_test_fewer_rows_than = b.id_dbtutils_test_fewer_rows_than 58 | {{ join_gb_cols }} 59 | 60 | ), 61 | final as ( 62 | 63 | select *, 64 | case 65 | -- fail the test if we have more rows than the reference model and return the row count delta 66 | when count_our_model > count_comparison_model then (count_our_model - count_comparison_model) 67 | -- fail the test if they are the same number 68 | when count_our_model = count_comparison_model then 1 69 | -- pass the test if the delta is positive (i.e. return the number 0) 70 | else 0 71 | end as row_count_delta 72 | from counts 73 | 74 | ) 75 | 76 | select * from final 77 | 78 | {% endmacro %} 79 | -------------------------------------------------------------------------------- /integration_tests/dbt_metrics/models/trino__custom_calendar.sql: -------------------------------------------------------------------------------- 1 | with days as ( 2 | 3 | 4 | with rawdata as ( 5 | 6 | 7 | with p as ( 8 | select 0 as generated_number union all select 1 9 | ), unioned as ( 10 | 11 | select 12 | 13 | 14 | p0.generated_number * power(2, 0) 15 | + 16 | 17 | p1.generated_number * power(2, 1) 18 | + 19 | 20 | p2.generated_number * power(2, 2) 21 | + 22 | 23 | p3.generated_number * power(2, 3) 24 | + 25 | 26 | p4.generated_number * power(2, 4) 27 | + 28 | 29 | p5.generated_number * power(2, 5) 30 | + 31 | 32 | p6.generated_number * power(2, 6) 33 | + 34 | 35 | p7.generated_number * power(2, 7) 36 | + 37 | 38 | p8.generated_number * power(2, 8) 39 | + 40 | 41 | p9.generated_number * power(2, 9) 42 | + 43 | 44 | p10.generated_number * power(2, 10) 45 | + 46 | 47 | p11.generated_number * power(2, 11) 48 | + 49 | 50 | p12.generated_number * power(2, 12) 51 | + 52 | 53 | p13.generated_number * power(2, 13) 54 | 55 | 56 | + 1 57 | as generated_number 58 | 59 | from 60 | 61 | 62 | p as p0 63 | cross join 64 | 65 | p as p1 66 | cross join 67 | 68 | p as p2 69 | cross join 70 | 71 | p as p3 72 | cross join 73 | 74 | p as p4 75 | cross join 76 | 77 | p as p5 78 | cross join 79 | 80 | p as p6 81 | cross join 82 | 83 | p as p7 84 | cross join 85 | 86 | p as p8 87 | cross join 88 | 89 | p as p9 90 | cross join 91 | 92 | p as p10 93 | cross join 94 | 95 | p as p11 96 | cross join 97 | 98 | p as p12 99 | cross join 100 | 101 | p as p13 102 | 103 | ) 104 | 105 | select * 106 | from unioned 107 | where generated_number <= 14610 108 | order by generated_number 109 | 110 | ), 111 | 112 | all_periods as ( 113 | 114 | select ( 115 | 116 | 117 | date_add( 118 | 'day', 119 | row_number() over (order by 1) - 1, 120 | cast('1990-01-01' as date) 121 | ) 122 | 123 | 124 | ) as date_day 125 | from rawdata 126 | 127 | ), 128 | 129 | filtered as ( 130 | 131 | select * 132 | from all_periods 133 | where date_day <= cast('2030-01-01' as date) 134 | 135 | ) 136 | 137 | select * from filtered 138 | 139 | 140 | ), 141 | 142 | final as ( 143 | select 144 | cast(date_day as date) as date_day, 145 | cast({{ date_trunc('week', 'date_day') }} as date) as date_week, 146 | cast({{ date_trunc('month', 'date_day') }} as date) as date_month, 147 | cast({{ date_trunc('quarter', 'date_day') }} as date) as date_quarter, 148 | '2022-01-01' as date_test, 149 | cast({{ date_trunc('year', 'date_day') }} as date) as date_year, 150 | true as is_weekend 151 | from days 152 | ) 153 | 154 | select * from final 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `trino_utils` 2 | 3 | This [dbt](https://github.com/dbt-labs/dbt) package contains macros 4 | that: 5 | - can be (re)used across dbt projects running on Trino or Starburst databases 6 | - define implementations of [dispatched macros](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter/#dispatch) from other packages that can be used on Trino or Starburst databases 7 | 8 | ## Compatibility 9 | 10 | This package provides "shims" for: 11 | - [dbt_utils](https://github.com/dbt-labs/dbt-utils) (partial) 12 | - [dbt_date](https://github.com/calogica/dbt-date) (partial) 13 | 14 | 15 | ## Usage 16 | 17 | Wherever a custom trino macro exists, dbt_utils adapter dispatch will pass to trino_utils. This means you can just do `{{dbt_utils.hash('mycolumnname')}}` just like your friends with Snowflake. 18 | 19 | ## Installation Instructions 20 | 21 | To make use of these trino adaptations in your dbt project, you must do two things: 22 | 1. Install both `trino_utils` and any of the compatible packages listed above by adding them to your `packages.yml` 23 | ```yaml 24 | packages: 25 | - package: dbt-labs/dbt_utils 26 | version: {SEE DBT HUB FOR NEWEST VERSION} 27 | - package: starburstdata/trino_utils 28 | version: {SEE DBT HUB FOR NEWEST VERSION} 29 | ``` 30 | 2. Tell the supported package to also look for the `trino_utils` macros by adding the relevant `dispatches` to your `dbt_project.yml` 31 | ```yaml 32 | dispatch: 33 | - macro_namespace: dbt_utils 34 | search_order: ['trino_utils', 'dbt_utils'] 35 | - macro_namespace: dbt_date 36 | search_order: ['trino_utils', 'dbt_date'] 37 | - macro_namespace: metrics 38 | search_order: ['trino_utils', 'metrics'] 39 | ``` 40 | Check [dbt Hub](https://hub.getdbt.com) for the latest installation 41 | instructions, or [read the docs](https://docs.getdbt.com/docs/package-management) 42 | for more information on installing packages. 43 | 44 | ## Updating dbt-utils submodule 45 | 46 | If new version of dbt-utils is released, and you want to use it, update instruction below. 47 | Remember to always point HEAD to certain tag in this submodule, never leave HEAD pointing to any branch. 48 | 49 | 1. `cd dbt-utils` go to dbt-utils submodule directory 50 | 2. `git switch main` switch to main branch 51 | 3. `git fetch --tags` fetch tags from dbt-utils remote 52 | 4. `git switch 1.0.0 --detach` switch to tag with certain version to which you want upgrade, here 1.0.0 for example 53 | 5. `cd ..` go to dbt-trino-utils top directory. Commit, push this change to dbt-trino-utils remote. 54 | 55 | ## Release process 56 | 57 | 1. [Create a release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository#creating-a-release) of github repository. Use [semantic versioning](https://semver.org/) to give an appriopiate version number. 58 | Don't try to match [dbt-utils](https://github.com/dbt-labs/dbt-utils/releases) version number, as at some point these versions' numbers won't be equal anyway, that's inevitable. Just follow semantic versioning. 59 | Remember to add appropriate release notes (changelog, contributors). 60 | 2. Wait for adding your new release to [dbt Hub](https://hub.getdbt.com/). 61 | It is done automatically by dbt-labs script [Hubcap](https://github.com/dbt-labs/hubcap#hubcap). 62 | Check on [trino_utils package site](https://hub.getdbt.com/starburstdata/trino_utils/latest/) if new version is available. It should happen within one business day. 63 | 3. Announce new release on dbt slack, at [db-presto-trino](https://getdbt.slack.com/archives/CNNPBQ24R) channel. 64 | 65 | ## trino_utils specific macros 66 | 67 | ### Cleanup Macros 68 | 69 | Some helper macros have been added to simplfy development database cleanup. Usage is as follows: 70 | 71 | Drop all schemas for each prefix with the provided prefix list (dev and myschema being a sample prefixes): 72 | ```bash 73 | dbt run-operation trino__drop_schemas_by_prefixes --args "{prefixes: ['dev', 'myschema']}" 74 | ``` 75 | 76 | Drop all schemas with the single provided prefix (dev being a sample prefix): 77 | ```bash 78 | dbt run-operation trino__drop_schemas_by_prefixes --args "{prefixes: myschema}" 79 | ``` 80 | 81 | Drop a schema with a specific name (myschema_seed being a sample schema name used in the project): 82 | ```bash 83 | dbt run-operation trino__drop_schema_by_name --args "{schema_name: myschema_seed}" 84 | ``` 85 | 86 | Drop any models that are no longer included in the project (dependent on the current target): 87 | ```bash 88 | dbt run-operation trino__drop_old_relations 89 | ``` 90 | or for a dry run to preview dropped models: 91 | ```bash 92 | dbt run-operation trino__drop_old_relations --args "{dry_run: true}" 93 | ``` 94 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------