├── .gitignore ├── dbt ├── dbt_packages │ └── dbt_utils │ │ ├── integration_tests │ │ ├── data │ │ │ ├── .gitkeep │ │ │ ├── schema_tests │ │ │ │ ├── data_test_accepted_range.csv │ │ │ │ ├── data_cardinality_equality_a.csv │ │ │ │ ├── data_test_at_least_one.csv │ │ │ │ ├── data_test_equal_rowcount.csv │ │ │ │ ├── data_test_not_constant.csv │ │ │ │ ├── data_test_relationships_where_table_1.csv │ │ │ │ ├── data_test_relationships_where_table_2.csv │ │ │ │ ├── data_cardinality_equality_b.csv │ │ │ │ ├── data_test_equality_a.csv │ │ │ │ ├── data_test_equality_b.csv │ │ │ │ ├── data_test_expression_is_true.csv │ │ │ │ ├── data_test_fewer_rows_than_table_1.csv │ │ │ │ ├── data_test_fewer_rows_than_table_2.csv │ │ │ │ ├── data_test_mutually_exclusive_ranges_no_gaps.csv │ │ │ │ ├── data_test_sequential_values.csv │ │ │ │ ├── data_test_not_accepted_values.csv │ │ │ │ ├── data_not_null_proportion.csv │ │ │ │ ├── data_test_sequential_timestamps.csv │ │ │ │ ├── data_test_mutually_exclusive_ranges_with_gaps.csv │ │ │ │ ├── data_unique_combination_of_columns.csv │ │ │ │ ├── data_test_mutually_exclusive_ranges_with_gaps_zero_length.csv │ │ │ │ ├── data_test_equality_floats_a.csv │ │ │ │ ├── data_test_equality_floats_b.csv │ │ │ │ ├── schema.yml │ │ │ │ ├── data_test_equality_floats_columns_a.csv │ │ │ │ └── data_test_equality_floats_columns_b.csv │ │ │ ├── sql │ │ │ │ ├── data_star_quote_identifiers.csv │ │ │ │ ├── data_pivot_expected.csv │ │ │ │ ├── data_deduplicate_expected.csv │ │ │ │ ├── data_events_20180101.csv │ │ │ │ ├── data_events_20180102.csv │ │ │ │ ├── data_events_20180103.csv │ │ │ │ ├── data_filtered_columns_in_relation_expected.csv │ │ │ │ ├── data_pivot.csv │ │ │ │ ├── data_star_expected.csv │ │ │ │ ├── data_get_column_values_where_expected.csv │ │ │ │ ├── data_star.csv │ │ │ │ ├── data_pivot_expected_apostrophe.csv │ │ │ │ ├── data_deduplicate.csv │ │ │ │ ├── data_nullcheck_table.csv │ │ │ │ ├── data_union_table_2.csv │ │ │ │ ├── data_filtered_columns_in_relation.csv │ │ │ │ ├── data_get_query_results_as_dict.csv │ │ │ │ ├── data_union_table_1.csv │ │ │ │ ├── data_safe_add.csv │ │ │ │ ├── data_safe_subtract.csv │ │ │ │ ├── data_star_aggregate.csv │ │ │ │ ├── data_star_aggregate_expected.csv │ │ │ │ ├── data_get_single_value.csv │ │ │ │ ├── data_generate_series.csv │ │ │ │ ├── data_get_column_values.csv │ │ │ │ ├── data_safe_divide.csv │ │ │ │ ├── data_union_events_expected.csv │ │ │ │ ├── data_get_column_values_dropped.csv │ │ │ │ ├── data_union_exclude_expected.csv │ │ │ │ ├── data_star_prefix_suffix_expected.csv │ │ │ │ ├── data_union_expected.csv │ │ │ │ ├── data_safe_divide_denominator_expressions.csv │ │ │ │ ├── data_safe_divide_numerator_expressions.csv │ │ │ │ ├── data_unpivot.csv │ │ │ │ ├── data_get_column_values_where.csv │ │ │ │ ├── data_unpivot_bool.csv │ │ │ │ ├── data_unpivot_quote.csv │ │ │ │ ├── data_unpivot_expected.csv │ │ │ │ ├── data_unpivot_quote_expected.csv │ │ │ │ ├── data_unpivot_bool_expected.csv │ │ │ │ ├── data_unpivot_original_api_expected.csv │ │ │ │ ├── data_generate_surrogate_key.csv │ │ │ │ └── data_width_bucket.csv │ │ │ ├── geo │ │ │ │ ├── data_haversine_km.csv │ │ │ │ └── data_haversine_mi.csv │ │ │ ├── datetime │ │ │ │ └── data_date_spine.csv │ │ │ └── web │ │ │ │ ├── data_urls.csv │ │ │ │ ├── data_url_host.csv │ │ │ │ └── data_url_path.csv │ │ ├── macros │ │ │ ├── .gitkeep │ │ │ ├── limit_zero.sql │ │ │ ├── tests.sql │ │ │ └── assert_equal_values.sql │ │ ├── packages.yml │ │ ├── .gitignore │ │ ├── models │ │ │ ├── sql │ │ │ │ ├── test_union_where.sql │ │ │ │ ├── test_union.sql │ │ │ │ ├── test_union_base.sql │ │ │ │ ├── test_union_where_base.sql │ │ │ │ ├── test_union_no_source_column.sql │ │ │ │ ├── test_union_exclude_lowercase.sql │ │ │ │ ├── test_union_exclude_uppercase.sql │ │ │ │ ├── test_union_exclude_base_lowercase.sql │ │ │ │ ├── test_union_exclude_base_uppercase.sql │ │ │ │ ├── test_safe_add.sql │ │ │ │ ├── test_safe_subtract.sql │ │ │ │ ├── test_star.sql │ │ │ │ ├── test_star_uppercase.sql │ │ │ │ ├── test_width_bucket.sql │ │ │ │ ├── test_unpivot_quote.sql │ │ │ │ ├── test_get_column_values_where.sql │ │ │ │ ├── test_get_relations_by_prefix_and_union.sql │ │ │ │ ├── test_star_no_columns.sql │ │ │ │ ├── test_deduplicate.sql │ │ │ │ ├── test_star_prefix_suffix.sql │ │ │ │ ├── test_generate_surrogate_key.sql │ │ │ │ ├── test_groupby.sql │ │ │ │ ├── test_pivot.sql │ │ │ │ ├── test_get_relations_by_pattern.sql │ │ │ │ ├── test_star_quote_identifiers.sql │ │ │ │ ├── test_generate_series.sql │ │ │ │ ├── test_pivot_apostrophe.sql │ │ │ │ ├── test_star_aggregate.sql │ │ │ │ ├── test_get_filtered_columns_in_relation.sql │ │ │ │ ├── test_nullcheck_table.sql │ │ │ │ ├── test_get_column_values.sql │ │ │ │ ├── test_not_empty_string_failing.sql │ │ │ │ ├── test_unpivot_bool.sql │ │ │ │ ├── test_safe_divide.sql │ │ │ │ ├── test_not_empty_string_passing.sql │ │ │ │ ├── test_unpivot.sql │ │ │ │ ├── test_get_single_value_default.sql │ │ │ │ └── test_get_single_value.sql │ │ │ ├── generic_tests │ │ │ │ ├── test_equal_rowcount.sql │ │ │ │ ├── equality_less_columns.sql │ │ │ │ ├── test_fewer_rows_than.sql │ │ │ │ ├── recency_time_included.sql │ │ │ │ ├── test_equal_column_subset.sql │ │ │ │ └── recency_time_excluded.sql │ │ │ ├── datetime │ │ │ │ ├── schema.yml │ │ │ │ └── test_date_spine.sql │ │ │ ├── web │ │ │ │ ├── test_url_host.sql │ │ │ │ ├── test_url_path.sql │ │ │ │ ├── test_urls.sql │ │ │ │ └── schema.yml │ │ │ └── geo │ │ │ │ ├── schema.yml │ │ │ │ ├── test_haversine_distance_km.sql │ │ │ │ └── test_haversine_distance_mi.sql │ │ ├── tests │ │ │ ├── jinja_helpers │ │ │ │ ├── assert_pretty_time_is_string.sql │ │ │ │ ├── assert_pretty_output_msg_is_string.sql │ │ │ │ └── test_slugify.sql │ │ │ ├── sql │ │ │ │ ├── test_get_single_value_multiple_rows.sql │ │ │ │ └── test_get_column_values_use_default.sql │ │ │ ├── generic │ │ │ │ └── expect_table_columns_to_match_set.sql │ │ │ └── assert_get_query_results_as_dict_objects_equal.sql │ │ └── dbt_project.yml │ │ ├── .github │ │ ├── CODEOWNERS │ │ ├── workflows │ │ │ ├── stale.yml │ │ │ ├── create-table-of-contents.yml │ │ │ ├── triage-labels.yml │ │ │ └── ci.yml │ │ ├── ISSUE_TEMPLATE │ │ │ ├── feature_request.md │ │ │ ├── dbt_minor_release.md │ │ │ ├── bug_report.md │ │ │ └── utils_minor_release.md │ │ └── pull_request_template.md │ │ ├── supported_adapters.env │ │ ├── run_functional_test.sh │ │ ├── docker-compose.yml │ │ ├── pytest.ini │ │ ├── dbt_project.yml │ │ ├── macros │ │ ├── jinja_helpers │ │ │ ├── log_info.sql │ │ │ ├── pretty_log_format.sql │ │ │ ├── pretty_time.sql │ │ │ ├── _is_relation.sql │ │ │ ├── slugify.sql │ │ │ └── _is_ephemeral.sql │ │ ├── sql │ │ │ ├── safe_divide.sql │ │ │ ├── groupby.sql │ │ │ ├── nullcheck.sql │ │ │ ├── nullcheck_table.sql │ │ │ ├── get_tables_by_prefix_sql.sql │ │ │ ├── safe_add.sql │ │ │ ├── safe_subtract.sql │ │ │ ├── surrogate_key.sql │ │ │ ├── get_query_results_as_dict.sql │ │ │ ├── generate_surrogate_key.sql │ │ │ ├── get_filtered_columns_in_relation.sql │ │ │ ├── get_single_value.sql │ │ │ ├── width_bucket.sql │ │ │ ├── get_relations_by_prefix.sql │ │ │ ├── get_table_types_sql.sql │ │ │ ├── get_relations_by_pattern.sql │ │ │ ├── generate_series.sql │ │ │ ├── date_spine.sql │ │ │ ├── star.sql │ │ │ ├── haversine_distance.sql │ │ │ └── get_column_values.sql │ │ ├── web │ │ │ ├── get_url_parameter.sql │ │ │ ├── get_url_host.sql │ │ │ └── get_url_path.sql │ │ └── generic_tests │ │ │ ├── expression_is_true.sql │ │ │ ├── not_constant.sql │ │ │ ├── not_empty_string.sql │ │ │ ├── not_accepted_values.sql │ │ │ ├── cardinality_equality.sql │ │ │ ├── unique_combination_of_columns.sql │ │ │ ├── relationships_where.sql │ │ │ ├── accepted_range.sql │ │ │ ├── not_null_proportion.sql │ │ │ ├── recency.sql │ │ │ ├── at_least_one.sql │ │ │ ├── sequential_values.sql │ │ │ ├── equal_rowcount.sql │ │ │ └── fewer_rows_than.sql │ │ ├── .gitignore │ │ ├── run_test.sh │ │ ├── docs │ │ └── decisions │ │ │ ├── README.md │ │ │ └── adr-0001-decision-record-format.md │ │ ├── dev-requirements.txt │ │ ├── Makefile │ │ ├── RELEASE.md │ │ └── tox.ini ├── target │ ├── graph.gpickle │ ├── partial_parse.msgpack │ ├── compiled │ │ └── api_bi_project │ │ │ └── models │ │ │ ├── models.yml │ │ │ ├── not_null_stg_customers_country.sql │ │ │ ├── not_null_stg_payments_payment_id.sql │ │ │ ├── not_null_stg_sessions_session_id.sql │ │ │ ├── not_null_stg_customers_customer_id.sql │ │ │ ├── not_null_fct_revenue_daily_order_day.sql │ │ │ ├── not_null_fct_marketing_attribution_session_day.sql │ │ │ ├── unique_stg_payments_payment_id.sql │ │ │ ├── unique_stg_sessions_session_id.sql │ │ │ ├── unique_stg_customers_customer_id.sql │ │ │ ├── accepted_values_stg_payments_0a68bd20fe58431edb2dbf71cff25165.sql │ │ │ └── accepted_values_stg_sessions_ff93056935f7dcedf7680c76caed8239.sql │ │ │ ├── marts │ │ │ ├── fct_revenue_daily.sql │ │ │ ├── dim_customer.sql │ │ │ └── fct_marketing_attribution.sql │ │ │ └── staging │ │ │ ├── stg_customers.sql │ │ │ ├── stg_sessions.sql │ │ │ └── stg_payments.sql │ ├── semantic_manifest.json │ └── run │ │ └── api_bi_project │ │ └── models │ │ ├── models.yml │ │ ├── not_null_stg_customers_country.sql │ │ ├── not_null_stg_payments_payment_id.sql │ │ ├── not_null_stg_sessions_session_id.sql │ │ ├── not_null_stg_customers_customer_id.sql │ │ ├── not_null_fct_revenue_daily_order_day.sql │ │ ├── not_null_fct_marketing_attribution_session_day.sql │ │ ├── unique_stg_payments_payment_id.sql │ │ ├── unique_stg_sessions_session_id.sql │ │ ├── unique_stg_customers_customer_id.sql │ │ ├── accepted_values_stg_payments_0a68bd20fe58431edb2dbf71cff25165.sql │ │ └── accepted_values_stg_sessions_ff93056935f7dcedf7680c76caed8239.sql │ │ ├── marts │ │ ├── fct_revenue_daily.sql │ │ ├── dim_customer.sql │ │ └── fct_marketing_attribution.sql │ │ └── staging │ │ ├── stg_customers.sql │ │ ├── stg_sessions.sql │ │ └── stg_payments.sql ├── models │ ├── marts │ │ ├── fct_revenue_daily.sql │ │ ├── dim_customer.sql │ │ └── fct_marketing_attribution.sql │ ├── staging │ │ ├── stg_customers.sql │ │ ├── stg_sessions.sql │ │ └── stg_payments.sql │ └── models.yml ├── profiles │ ├── .user.yml │ └── profiles.yml ├── package-lock.yml ├── packages.yml └── dbt_project.yml ├── mock_api └── requirements.txt ├── dashboard └── basic dashboard.pbix ├── airflow ├── requirements.txt └── logs │ └── dag_id=etl_api_to_bi │ ├── run_id=scheduled__2025-09-15T210000+0000 │ └── task_id=dbt_build │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-15T183000+0000 │ ├── task_id=extract_sessions │ │ └── attempt=1.log │ └── task_id=extract_customers │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-17T141500+0000 │ └── task_id=extract_sessions │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-17T154500+0000 │ └── task_id=extract_payments │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-17T161500+0000 │ └── task_id=extract_payments │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-19T010000+0000 │ └── task_id=extract_sessions │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-16T143000+0000 │ └── task_id=extract_customers │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-15T174500+0000 │ └── task_id=extract_sessions │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-15T214500+0000 │ ├── task_id=extract_payments │ │ └── attempt=1.log │ └── task_id=extract_sessions │ │ └── attempt=1.log │ ├── run_id=scheduled__2025-09-15T224500+0000 │ ├── task_id=extract_payments │ │ └── attempt=1.log │ └── task_id=extract_sessions │ │ └── attempt=1.log │ └── run_id=scheduled__2025-09-17T170000+0000 │ └── task_id=extract_payments │ └── attempt=1.log ├── .env └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/macros/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mock_api/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==3.0.3 2 | python-dateutil==2.9.0.post0 3 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @dbt-labs/dbt-package-owners 2 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | 2 | packages: 3 | - local: ../ 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_accepted_range.csv: -------------------------------------------------------------------------------- 1 | id 2 | -1 3 | 11 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_star_quote_identifiers.csv: -------------------------------------------------------------------------------- 1 | column_one 2 | a 3 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_pivot_expected.csv: -------------------------------------------------------------------------------- 1 | size,red,blue 2 | S,1,1 3 | M,1,0 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/supported_adapters.env: -------------------------------------------------------------------------------- 1 | SUPPORTED_ADAPTERS=postgres,snowflake,redshift,bigquery 2 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_cardinality_equality_a.csv: -------------------------------------------------------------------------------- 1 | same_name 2 | 1 3 | 2 4 | 3 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_at_least_one.csv: -------------------------------------------------------------------------------- 1 | field,value 2 | a,1 3 | b, 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equal_rowcount.csv: -------------------------------------------------------------------------------- 1 | field 2 | 1 3 | 1 4 | 2 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_deduplicate_expected.csv: -------------------------------------------------------------------------------- 1 | user_id,event,version 2 | 1,play,2 3 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_events_20180101.csv: -------------------------------------------------------------------------------- 1 | user_id,event 2 | 1,play 3 | 2,pause 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_events_20180102.csv: -------------------------------------------------------------------------------- 1 | user_id,event 2 | 3,play 3 | 4,pause 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_events_20180103.csv: -------------------------------------------------------------------------------- 1 | user_id,event 2 | 5,play 3 | 6,pause 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_filtered_columns_in_relation_expected.csv: -------------------------------------------------------------------------------- 1 | field_2,field_3 2 | h,i -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_pivot.csv: -------------------------------------------------------------------------------- 1 | size,color 2 | S,red 3 | S,blue 4 | S,blue's 5 | M,red -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_star_expected.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2 2 | a,b 3 | d,e 4 | g,h 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_get_column_values_where_expected.csv: -------------------------------------------------------------------------------- 1 | field 2 | a 3 | c 4 | e 5 | g -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_star.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3 2 | a,b,c 3 | d,e,f 4 | g,h,i 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/run_functional_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 -m pytest tests/functional -n4 --profile $1 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_not_constant.csv: -------------------------------------------------------------------------------- 1 | col_a,field 2 | 1,1 3 | 1,1 4 | 1,2 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_relationships_where_table_1.csv: -------------------------------------------------------------------------------- 1 | id 2 | 1 3 | 2 4 | 3 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_relationships_where_table_2.csv: -------------------------------------------------------------------------------- 1 | id 2 | 1 3 | 2 4 | 4 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_pivot_expected_apostrophe.csv: -------------------------------------------------------------------------------- 1 | size,red,blue,blues 2 | S,1,1,1 3 | M,1,0,0 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dbt_modules/ 3 | logs/ 4 | .env/ 5 | profiles.yml 6 | package-lock.yml 7 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_cardinality_equality_b.csv: -------------------------------------------------------------------------------- 1 | same_name,different_name 2 | 1,2 3 | 2,3 4 | 3,1 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equality_a.csv: -------------------------------------------------------------------------------- 1 | col_a,col_b,col_c 2 | 1,1,3 3 | 1,2,1 4 | 2,3,3 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equality_b.csv: -------------------------------------------------------------------------------- 1 | col_a,col_b,col_c 2 | 1,1,2 3 | 1,2,2 4 | 2,3,2 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_expression_is_true.csv: -------------------------------------------------------------------------------- 1 | col_a,col_b 2 | 0,1 3 | 1,0 4 | 0.5,0.5 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv: -------------------------------------------------------------------------------- 1 | col_a,field 2 | 1,1 3 | 1,2 4 | 1,3 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_deduplicate.csv: -------------------------------------------------------------------------------- 1 | user_id,event,version 2 | 1,play,1 3 | 1,play,2 4 | 2,pause,1 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_nullcheck_table.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3 2 | a,'',1 3 | '',b,2 4 | '','',3 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_union_table_2.csv: -------------------------------------------------------------------------------- 1 | id,favorite_color,favorite_number 2 | 1,green,7 3 | 2,pink,13 4 | -------------------------------------------------------------------------------- /dbt/target/graph.gpickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thejasono/data-pipeline-airflow-api-postgres-dbt-powerbi/HEAD/dbt/target/graph.gpickle -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv: -------------------------------------------------------------------------------- 1 | col_a,field 2 | 1,1 3 | 1,2 4 | 1,3 5 | 1,4 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_filtered_columns_in_relation.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3 2 | a,b,c 3 | d,e,f 4 | g,h,i -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_get_query_results_as_dict.csv: -------------------------------------------------------------------------------- 1 | col_1,col_2,col_3 2 | 1,a,True 3 | 2,b,False 4 | 3,c, 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_union_table_1.csv: -------------------------------------------------------------------------------- 1 | id,name,favorite_number 2 | 1,drew,pi 3 | 2,bob,e 4 | 3,alice,4 5 | -------------------------------------------------------------------------------- /dashboard/basic dashboard.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thejasono/data-pipeline-airflow-api-postgres-dbt-powerbi/HEAD/dashboard/basic dashboard.pbix -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_safe_add.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3,expected 2 | 1,2,3,6 3 | 1,,3,4 4 | ,,2,2 5 | ,,,0 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/geo/data_haversine_km.csv: -------------------------------------------------------------------------------- 1 | lat_1,lon_1,lat_2,lon_2,output 2 | 48.864716,2.349014,52.379189,4.899431,430 3 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/geo/data_haversine_mi.csv: -------------------------------------------------------------------------------- 1 | lat_1,lon_1,lat_2,lon_2,output 2 | 48.864716,2.349014,52.379189,4.899431,267 3 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_safe_subtract.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3,expected 2 | 3,2,1,0 3 | 4,,3,1 4 | ,,2,-2 5 | ,,,0 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_star_aggregate.csv: -------------------------------------------------------------------------------- 1 | group_field_1,group_field_2,value_field 2 | a,b,1 3 | a,b,2 4 | c,d,3 5 | c,e,4 -------------------------------------------------------------------------------- /dbt/target/partial_parse.msgpack: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thejasono/data-pipeline-airflow-api-postgres-dbt-powerbi/HEAD/dbt/target/partial_parse.msgpack -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_no_gaps.csv: -------------------------------------------------------------------------------- 1 | lower_bound,upper_bound 2 | 0,1 3 | 1,2 4 | 2,4 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_sequential_values.csv: -------------------------------------------------------------------------------- 1 | col_a,my_even_sequence 2 | 1,2 3 | 1,4 4 | 1,6 5 | 2,8 6 | 2,10 7 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_star_aggregate_expected.csv: -------------------------------------------------------------------------------- 1 | group_field_1,group_field_2,value_field_sum 2 | a,b,3 3 | c,d,3 4 | c,e,4 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_not_accepted_values.csv: -------------------------------------------------------------------------------- 1 | id,city 2 | 1,Barcelona 3 | 2,London 4 | 3,Paris 5 | 4,New York 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_get_single_value.csv: -------------------------------------------------------------------------------- 1 | date_value,float_value,int_value,string_value 2 | 2017-01-01 00:00:00,3.3,19,string_a -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_generate_series.csv: -------------------------------------------------------------------------------- 1 | generated_number 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | 10 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_get_column_values.csv: -------------------------------------------------------------------------------- 1 | field 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | g 10 | g 11 | g 12 | g 13 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_safe_divide.csv: -------------------------------------------------------------------------------- 1 | numerator,denominator,output 2 | 6,0, 3 | 10,5,2 4 | ,, 5 | ,0, 6 | 17,, 7 | 0,, 8 | ,9, 9 | 0,5,0 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_union_events_expected.csv: -------------------------------------------------------------------------------- 1 | user_id,event 2 | 1,play 3 | 2,pause 4 | 3,play 5 | 4,pause 6 | 5,play 7 | 6,pause 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_get_column_values_dropped.csv: -------------------------------------------------------------------------------- 1 | field 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | g 10 | g 11 | g 12 | g 13 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_where.sql: -------------------------------------------------------------------------------- 1 | select 2 | id, 3 | favorite_number 4 | from 5 | {{ ref('test_union_where_base') }} 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_union_exclude_expected.csv: -------------------------------------------------------------------------------- 1 | id,favorite_color,favorite_number 2 | 1,,pi 3 | 2,,e 4 | 3,,4 5 | 1,"green",7 6 | 2,"pink",13 7 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_not_null_proportion.csv: -------------------------------------------------------------------------------- 1 | point_5,point_9 2 | 1,1 3 | ,2 4 | ,3 5 | 4,4 6 | 5,5 7 | 6,6 8 | ,7 9 | ,8 10 | , 11 | 10,10 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_star_prefix_suffix_expected.csv: -------------------------------------------------------------------------------- 1 | prefix_field_1_suffix,prefix_field_2_suffix,prefix_field_3_suffix 2 | a,b,c 3 | d,e,f 4 | g,h,i 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_sequential_timestamps.csv: -------------------------------------------------------------------------------- 1 | my_timestamp 2 | 2021-01-01 00:00 3 | 2021-01-01 01:00 4 | 2021-01-01 02:00 5 | 2021-01-01 03:00 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_union_expected.csv: -------------------------------------------------------------------------------- 1 | id,name,favorite_color,favorite_number 2 | 1,"drew",,pi 3 | 2,"bob",,e 4 | 3,"alice",,4 5 | 1,,"green",7 6 | 2,,"pink",13 7 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union.sql: -------------------------------------------------------------------------------- 1 | 2 | select 3 | id, 4 | name, 5 | favorite_color, 6 | favorite_number 7 | 8 | from {{ ref('test_union_base') }} 9 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_base.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ dbt_utils.union_relations([ 3 | ref('data_union_table_1'), 4 | ref('data_union_table_2')] 5 | ) }} 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_safe_divide_denominator_expressions.csv: -------------------------------------------------------------------------------- 1 | numerator,denominator_1,denominator_2,output 2 | ,0,4, 3 | 6,3,2,1 4 | 0,2,6,0 5 | 0,,8, 6 | 5,,2, 7 | 4,0,4, -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_safe_divide_numerator_expressions.csv: -------------------------------------------------------------------------------- 1 | numerator_1,numerator_2,denominator,output 2 | 0,5,9,0 3 | 2,3,0, 4 | 0,0,0, 5 | 3,4,, 6 | ,6,14, 7 | 2,5,2,5 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_where_base.sql: -------------------------------------------------------------------------------- 1 | {{ dbt_utils.union_relations( 2 | [ref('data_union_table_1'), ref('data_union_table_2')], 3 | where="id = 1" 4 | ) }} 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | postgres: 4 | image: cimg/postgres:9.6 5 | environment: 6 | - POSTGRES_USER=root 7 | ports: 8 | - "5432:5432" 9 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/generic_tests/test_equal_rowcount.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_test_equal_rowcount') }} 4 | 5 | ) 6 | 7 | select 8 | field 9 | from data -------------------------------------------------------------------------------- /airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow-providers-http==4.12.0 2 | apache-airflow-providers-postgres==5.11.2 3 | dbt-core==1.8.2 4 | dbt-postgres==1.8.2 5 | requests==2.31.0 6 | python-dateutil==2.9.0.post0 7 | psycopg2-binary 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/datetime/data_date_spine.csv: -------------------------------------------------------------------------------- 1 | date_day 2 | 2018-01-01 3 | 2018-01-02 4 | 2018-01-03 5 | 2018-01-04 6 | 2018-01-05 7 | 2018-01-06 8 | 2018-01-07 9 | 2018-01-08 10 | 2018-01-09 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/web/data_urls.csv: -------------------------------------------------------------------------------- 1 | url,medium,source 2 | http://drewbanin.com/milky?utm_medium=organic,organic, 3 | http://drewbanin.com/milky?utm_medium=organic&utm_source=github,organic,github 4 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,status,segment,name 2 | 123,2017-01-01,active,tier 1,name 1 3 | 234,2017-02-01,active,tier 3,name 3 4 | 567,2017-03-01,churned,tier 2,name 2 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/datetime/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_date_spine 5 | data_tests: 6 | - dbt_utils.equality: 7 | compare_model: ref('data_date_spine') 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_get_column_values_where.csv: -------------------------------------------------------------------------------- 1 | field,condition 2 | a,left 3 | b,right 4 | c,left 5 | d,right 6 | e,left 7 | f,right 8 | g,left 9 | g,right 10 | g,left 11 | g,right 12 | g,left -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot_bool.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,status,segment,is_updated 2 | 123,2017-01-01,active,tier 1,TRUE 3 | 234,2017-02-01,active,tier 3,FALSE 4 | 567,2017-03-01,churned,tier 2, 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/generic_tests/equality_less_columns.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_test_equality_b') }} 4 | 5 | ) 6 | 7 | select 8 | col_a, col_b 9 | from data 10 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/generic_tests/test_fewer_rows_than.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_test_fewer_rows_than_table_1') }} 4 | 5 | ) 6 | 7 | select 8 | col_a, field 9 | from data -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/not_null_stg_customers_country.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | select country 8 | from "db"."public_staging"."stg_customers" 9 | where country is null 10 | 11 | 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot_quote.csv: -------------------------------------------------------------------------------- 1 | Customer_Id,Created_At,sTaTuS,SEGMENT,Name 2 | 123,2017-01-01,active,tier 1,name 1 3 | 234,2017-02-01,active,tier 3,name 3 4 | 567,2017-03-01,churned,tier 2,name 2 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_no_source_column.sql: -------------------------------------------------------------------------------- 1 | {{ dbt_utils.union_relations([ 2 | ref('data_union_table_1'), 3 | ref('data_union_table_2') 4 | ], 5 | source_column_name = none 6 | ) }} -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/not_null_stg_payments_payment_id.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | select payment_id 8 | from "db"."public_staging"."stg_payments" 9 | where payment_id is null 10 | 11 | 12 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/not_null_stg_sessions_session_id.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | select session_id 8 | from "db"."public_staging"."stg_sessions" 9 | where session_id is null 10 | 11 | 12 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/not_null_stg_customers_customer_id.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | select customer_id 8 | from "db"."public_staging"."stg_customers" 9 | where customer_id is null 10 | 11 | 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps.csv: -------------------------------------------------------------------------------- 1 | subscription_id,valid_from,valid_to 2 | 1,2019-01-01,2019-02-01 3 | 1,2019-03-03,2019-04-01 4 | 2,2019-05-06,2019-07-02 5 | 2,2019-07-03, 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/generic_tests/recency_time_included.sql: -------------------------------------------------------------------------------- 1 | select 2 | 1 as col1, 3 | 2 as col2, 4 | cast({{ dbt.dateadd('hour', -23, dbt.current_timestamp()) }} as {{ dbt.type_timestamp() }}) as created_at 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/generic_tests/test_equal_column_subset.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='ephemeral') }} 2 | 3 | select 4 | 5 | first_name, 6 | last_name, 7 | email 8 | 9 | from {{ ref('data_people') }} 10 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/not_null_fct_revenue_daily_order_day.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | select order_day 8 | from "db"."public_analytics"."fct_revenue_daily" 9 | where order_day is null 10 | 11 | 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_exclude_lowercase.sql: -------------------------------------------------------------------------------- 1 | select 2 | {{ dbt_utils.star(ref("test_union_exclude_base_lowercase"), except=["_dbt_source_relation"]) }} 3 | 4 | from {{ ref("test_union_exclude_base_lowercase") }} 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_exclude_uppercase.sql: -------------------------------------------------------------------------------- 1 | select 2 | {{ dbt_utils.star(ref("test_union_exclude_base_uppercase"), except=["_DBT_SOURCE_RELATION"]) }} 3 | 4 | from {{ ref("test_union_exclude_base_uppercase") }} 5 | -------------------------------------------------------------------------------- /dbt/target/semantic_manifest.json: -------------------------------------------------------------------------------- 1 | {"semantic_models": [], "metrics": [], "project_configuration": {"time_spine_table_configurations": [], "metadata": null, "dsi_package_version": {"major_version": "0", "minor_version": "5", "patch_version": "1"}}, "saved_queries": []} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning 4 | ignore:unclosed file .*:ResourceWarning 5 | env_files = 6 | test.env 7 | testpaths = 8 | tests/functional -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_exclude_base_lowercase.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ dbt_utils.union_relations( 3 | relations=[ 4 | ref('data_union_table_1'), 5 | ref('data_union_table_2'), 6 | ], 7 | exclude=['name'] 8 | ) }} 9 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_union_exclude_base_uppercase.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ dbt_utils.union_relations( 3 | relations=[ 4 | ref('data_union_table_1'), 5 | ref('data_union_table_2'), 6 | ], 7 | exclude=['NAME'] 8 | ) }} 9 | -------------------------------------------------------------------------------- /dbt/models/marts/fct_revenue_daily.sql: -------------------------------------------------------------------------------- 1 | with p as ( 2 | select * from {{ ref('stg_payments') }} 3 | ) 4 | select 5 | order_day, 6 | product, 7 | country, 8 | sum(net_revenue) as net_revenue, 9 | count(*) as orders 10 | from p 11 | group by 1,2,3 12 | 13 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/not_null_fct_marketing_attribution_session_day.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | select session_day 8 | from "db"."public_analytics"."fct_marketing_attribution" 9 | where session_day is null 10 | 11 | 12 | -------------------------------------------------------------------------------- /dbt/models/marts/dim_customer.sql: -------------------------------------------------------------------------------- 1 | with c as ( 2 | select * from {{ ref('stg_customers') }} 3 | ) 4 | select 5 | customer_id, 6 | company_name, 7 | country, 8 | industry, 9 | company_size, 10 | signup_date, 11 | is_churned, 12 | signup_month 13 | from c 14 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_unique_combination_of_columns.csv: -------------------------------------------------------------------------------- 1 | month,product,revenue 2 | 2019-01-01,jaffle,500 3 | 2019-01-01,lamington,100 4 | 2019-01-01,pavlova,600 5 | 2019-02-01,jaffle,300 6 | 2019-02-01,lamington,300 7 | 2019-02-01,pavlova,400 8 | -------------------------------------------------------------------------------- /dbt/profiles/.user.yml: -------------------------------------------------------------------------------- 1 | # .user.yml stores a unique identifier that dbt uses for anonymous usage 2 | # tracking and distinguishing between different users or environments. 3 | # It has no effect on model execution and can usually be ignored. 4 | id: f92d6d58-4ecd-4709-8912-8cbe81e469cf 5 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_safe_add.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_safe_add') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.safe_add(['field_1', 'field_2', 'field_3']) }} as actual, 10 | expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/marts/fct_revenue_daily.sql: -------------------------------------------------------------------------------- 1 | with p as ( 2 | select * from "db"."public_staging"."stg_payments" 3 | ) 4 | select 5 | order_day, 6 | product, 7 | country, 8 | sum(net_revenue) as net_revenue, 9 | count(*) as orders 10 | from p 11 | group by 1,2,3 -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | API_KEY=changeme 2 | API_RATE_LIMIT_PER_MIN=60 3 | POSTGRES_USER=db_user 4 | POSTGRES_PASSWORD=db_password 5 | POSTGRES_DB=db 6 | API_BASE_URL=http://mock-api:8000 7 | TZ=Europe/Berlin 8 | AIRFLOW_CONSTRAINTS_URL=https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt 9 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/web/test_url_host.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ref('data_url_host')}} 4 | 5 | ) 6 | 7 | select 8 | 9 | {{ dbt_utils.get_url_host('original_url') }} as actual, 10 | parsed_url as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt_utils' 2 | version: '0.1.0' 3 | 4 | require-dbt-version: [">=1.3.0", "<2.0.0"] 5 | 6 | config-version: 2 7 | 8 | target-path: "target" 9 | clean-targets: ["target", "dbt_modules", "dbt_packages"] 10 | macro-paths: ["macros"] 11 | log-path: "logs" 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_safe_subtract.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_safe_subtract') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.safe_subtract(['field_1', 'field_2', 'field_3']) }} as actual, 10 | expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/jinja_helpers/log_info.sql: -------------------------------------------------------------------------------- 1 | {% macro log_info(message) %} 2 | {{ return(adapter.dispatch('log_info', 'dbt_utils')(message)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__log_info(message) %} 6 | {{ log(dbt_utils.pretty_log_format(message), info=True) }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/marts/dim_customer.sql: -------------------------------------------------------------------------------- 1 | with c as ( 2 | select * from "db"."public_staging"."stg_customers" 3 | ) 4 | select 5 | customer_id, 6 | company_name, 7 | country, 8 | industry, 9 | company_size, 10 | signup_date, 11 | is_churned, 12 | signup_month 13 | from c -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_star.sql: -------------------------------------------------------------------------------- 1 | {% set exclude_field = 'field_3' %} 2 | 3 | 4 | with data as ( 5 | 6 | select 7 | {{ dbt_utils.star(from=ref('data_star'), except=[exclude_field]) }} 8 | 9 | from {{ ref('data_star') }} 10 | 11 | ) 12 | 13 | select * from data 14 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/web/test_url_path.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ref('data_url_path')}} 4 | 5 | ) 6 | 7 | select 8 | 9 | coalesce({{ dbt_utils.get_url_path('original_url') }}, '') as actual, 10 | coalesce(parsed_path, '') as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_star_uppercase.sql: -------------------------------------------------------------------------------- 1 | {% set exclude_field = 'FIELD_3' %} 2 | 3 | 4 | with data as ( 5 | 6 | select 7 | {{ dbt_utils.star(from=ref('data_star'), except=[exclude_field]) }} 8 | 9 | from {{ ref('data_star') }} 10 | 11 | ) 12 | 13 | select * from data 14 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/jinja_helpers/assert_pretty_time_is_string.sql: -------------------------------------------------------------------------------- 1 | {% if dbt_utils.pretty_time() is string %} 2 | {# Return 0 rows for the test to pass #} 3 | select 1 as col_name {{ limit_zero() }} 4 | {% else %} 5 | {# Return >0 rows for the test to fail #} 6 | select 1 7 | {% endif %} 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_width_bucket.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_width_bucket') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.width_bucket('amount', 'min_value', 'max_value', 'num_buckets') }} as actual, 10 | bucket as expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot_expected.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,prop,val 2 | 123,"2017-01-01","segment","tier 1" 3 | 123,"2017-01-01","status","active" 4 | 234,"2017-02-01","segment","tier 3" 5 | 234,"2017-02-01","status","active" 6 | 567,"2017-03-01","status","churned" 7 | 567,"2017-03-01","segment","tier 2" 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/jinja_helpers/assert_pretty_output_msg_is_string.sql: -------------------------------------------------------------------------------- 1 | {% if dbt_utils.pretty_log_format() is string %} 2 | {# Return 0 rows for the test to pass #} 3 | select 1 as col_name {{ limit_zero() }} 4 | {% else %} 5 | {# Return >0 rows for the test to fail #} 6 | select 1 7 | {% endif %} 8 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/unique_stg_payments_payment_id.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | select 6 | payment_id as unique_field, 7 | count(*) as n_records 8 | 9 | from "db"."public_staging"."stg_payments" 10 | where payment_id is not null 11 | group by payment_id 12 | having count(*) > 1 13 | 14 | 15 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/unique_stg_sessions_session_id.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | select 6 | session_id as unique_field, 7 | count(*) as n_records 8 | 9 | from "db"."public_staging"."stg_sessions" 10 | where session_id is not null 11 | group by session_id 12 | having count(*) > 1 13 | 14 | 15 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/macros/limit_zero.sql: -------------------------------------------------------------------------------- 1 | {% macro my_custom_macro() %} 2 | whatever 3 | {% endmacro %} 4 | 5 | {% macro limit_zero() %} 6 | {{ return(adapter.dispatch('limit_zero', 'dbt_utils')()) }} 7 | {% endmacro %} 8 | 9 | {% macro default__limit_zero() %} 10 | {{ return('limit 0') }} 11 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/jinja_helpers/pretty_log_format.sql: -------------------------------------------------------------------------------- 1 | {% macro pretty_log_format(message) %} 2 | {{ return(adapter.dispatch('pretty_log_format', 'dbt_utils')(message)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__pretty_log_format(message) %} 6 | {{ return( dbt_utils.pretty_time() ~ ' + ' ~ message) }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/jinja_helpers/pretty_time.sql: -------------------------------------------------------------------------------- 1 | {% macro pretty_time(format='%H:%M:%S') %} 2 | {{ return(adapter.dispatch('pretty_time', 'dbt_utils')(format)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__pretty_time(format='%H:%M:%S') %} 6 | {{ return(modules.datetime.datetime.now().strftime(format)) }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /dbt/models/marts/fct_marketing_attribution.sql: -------------------------------------------------------------------------------- 1 | with s as ( 2 | select * from {{ ref('stg_sessions') }} 3 | ) 4 | select 5 | session_day, 6 | source, 7 | medium, 8 | coalesce(campaign,'') as campaign, 9 | sum(converted::int) as conversions, 10 | avg((not bounced)::int)::float as engagement_rate 11 | from s 12 | group by 1,2,3,4 13 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/unique_stg_customers_customer_id.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | select 6 | customer_id as unique_field, 7 | count(*) as n_records 8 | 9 | from "db"."public_staging"."stg_customers" 10 | where customer_id is not null 11 | group by customer_id 12 | having count(*) > 1 13 | 14 | 15 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps_zero_length.csv: -------------------------------------------------------------------------------- 1 | subscription_id,valid_from,valid_to 2 | 3,2020-05-06,2020-05-07 3 | 3,2020-05-08,2020-05-10 4 | 3,2020-05-08,2020-05-08 5 | 3,2020-05-12,2020-05-15 6 | 4,2020-06-06,2020-06-07 7 | 4,2020-06-08,2020-06-08 8 | 4,2020-06-09,2020-06-10 9 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot_quote_expected.csv: -------------------------------------------------------------------------------- 1 | Customer_Id,Created_At,Prop,Val 2 | 123,"2017-01-01","SEGMENT","tier 1" 3 | 123,"2017-01-01","sTaTuS","active" 4 | 234,"2017-02-01","SEGMENT","tier 3" 5 | 234,"2017-02-01","sTaTuS","active" 6 | 567,"2017-03-01","sTaTuS","churned" 7 | 567,"2017-03-01","SEGMENT","tier 2" 8 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/safe_divide.sql: -------------------------------------------------------------------------------- 1 | {% macro safe_divide(numerator, denominator) -%} 2 | {{ return(adapter.dispatch('safe_divide', 'dbt_utils')(numerator, denominator)) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__safe_divide(numerator, denominator) %} 6 | ( {{ numerator }} ) / nullif( ( {{ denominator }} ), 0) 7 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/groupby.sql: -------------------------------------------------------------------------------- 1 | {%- macro group_by(n) -%} 2 | {{ return(adapter.dispatch('group_by', 'dbt_utils')(n)) }} 3 | {% endmacro %} 4 | 5 | {%- macro default__group_by(n) -%} 6 | 7 | group by {% for i in range(1, n + 1) -%} 8 | {{ i }}{{ ',' if not loop.last }} 9 | {%- endfor -%} 10 | 11 | {%- endmacro -%} 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/macros/tests.sql: -------------------------------------------------------------------------------- 1 | 2 | {% test assert_equal(model, actual, expected) %} 3 | select * from {{ model }} where {{ actual }} != {{ expected }} 4 | 5 | {% endtest %} 6 | 7 | 8 | {% test not_empty_string(model, column_name) %} 9 | 10 | select * from {{ model }} where {{ column_name }} = '' 11 | 12 | {% endtest %} 13 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/jinja_helpers/_is_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro _is_relation(obj, macro) %} 2 | {%- if not (obj is mapping and obj.get('metadata', {}).get('type', '').endswith('Relation')) -%} 3 | {%- do exceptions.raise_compiler_error("Macro " ~ macro ~ " expected a Relation but received the value: " ~ obj) -%} 4 | {%- endif -%} 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/generic_tests/recency_time_excluded.sql: -------------------------------------------------------------------------------- 1 | with yesterday_time as ( 2 | select 3 | 1 as col1, 4 | 2 as col2, 5 | {{ dbt.dateadd('day', -1, dbt.current_timestamp()) }} as created_at 6 | ) 7 | 8 | select 9 | col1, 10 | col2, 11 | {{ dbt.date_trunc('day', 'created_at') }} as created_at 12 | from yesterday_time -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/marts/fct_marketing_attribution.sql: -------------------------------------------------------------------------------- 1 | with s as ( 2 | select * from "db"."public_staging"."stg_sessions" 3 | ) 4 | select 5 | session_day, 6 | source, 7 | medium, 8 | coalesce(campaign,'') as campaign, 9 | sum(converted::int) as conversions, 10 | avg((not bounced)::int)::float as engagement_rate 11 | from s 12 | group by 1,2,3,4 -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_unpivot_quote.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ dbt_utils.unpivot( 3 | relation=ref('data_unpivot_quote'), 4 | cast_to=type_string(), 5 | exclude=['Customer_Id', 'Created_At'], 6 | remove=['Name'], 7 | field_name='Prop', 8 | value_name='Val', 9 | quote_identifiers=True, 10 | ) }} 11 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_column_values_where.sql: -------------------------------------------------------------------------------- 1 | {% set column_values = dbt_utils.get_column_values(ref('data_get_column_values_where'), 'field', where="condition = 'left'") %} 2 | 3 | -- Create a relation using the values 4 | {% for val in column_values -%} 5 | select {{ string_literal(val) }} as field {% if not loop.last %}union all{% endif %} 6 | {% endfor %} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/geo/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_haversine_distance_km 5 | data_tests: 6 | - assert_equal: 7 | actual: actual 8 | expected: expected 9 | - name: test_haversine_distance_mi 10 | data_tests: 11 | - assert_equal: 12 | actual: actual 13 | expected: expected 14 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_relations_by_prefix_and_union.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized = 'table') }} 2 | 3 | -- depends_on: {{ ref('data_events_20180101') }}, {{ ref('data_events_20180102') }}, {{ ref('data_events_20180103') }} 4 | 5 | {% set relations = dbt_utils.get_relations_by_prefix(target.schema, 'data_events_') %} 6 | {{ dbt_utils.union_relations(relations) }} 7 | -------------------------------------------------------------------------------- /dbt/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with src as ( 2 | select * from raw.customers 3 | ) 4 | select 5 | customer_id, 6 | company_name, 7 | country, 8 | industry, 9 | company_size, 10 | signup_date::timestamp as signup_date, 11 | updated_at::timestamp as updated_at, 12 | is_churned::boolean as is_churned, 13 | date_trunc('month', signup_date)::date as signup_month 14 | from src 15 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/not_null_stg_customers_country.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | 11 | 12 | select country 13 | from "db"."public_staging"."stg_customers" 14 | where country is null 15 | 16 | 17 | 18 | 19 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/not_null_stg_payments_payment_id.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | 11 | 12 | select payment_id 13 | from "db"."public_staging"."stg_payments" 14 | where payment_id is null 15 | 16 | 17 | 18 | 19 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/not_null_stg_sessions_session_id.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | 11 | 12 | select session_id 13 | from "db"."public_staging"."stg_sessions" 14 | where session_id is null 15 | 16 | 17 | 18 | 19 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot_bool_expected.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,prop,val 2 | 123,2017-01-01,segment,tier 1 3 | 123,2017-01-01,status,active 4 | 123,2017-01-01,is_updated,true 5 | 234,2017-02-01,segment,tier 3 6 | 234,2017-02-01,status,active 7 | 234,2017-02-01,is_updated,false 8 | 567,2017-03-01,status,churned 9 | 567,2017-03-01,is_updated, 10 | 567,2017-03-01,segment,tier 2 11 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_unpivot_original_api_expected.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,field_name,value 2 | 123,2017-01-01,status,active 3 | 123,2017-01-01,segment,tier 1 4 | 234,2017-02-01,status,active 5 | 234,2017-02-01,segment,tier 3 6 | 567,2017-03-01,status,churned 7 | 567,2017-03-01,segment,tier 2 8 | 123,2017-01-01,name,name 1 9 | 234,2017-02-01,name,name 3 10 | 567,2017-03-01,name,name 2 -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with src as ( 2 | select * from raw.customers 3 | ) 4 | select 5 | customer_id, 6 | company_name, 7 | country, 8 | industry, 9 | company_size, 10 | signup_date::timestamp as signup_date, 11 | updated_at::timestamp as updated_at, 12 | is_churned::boolean as is_churned, 13 | date_trunc('month', signup_date)::date as signup_month 14 | from src -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/not_null_stg_customers_customer_id.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | 11 | 12 | select customer_id 13 | from "db"."public_staging"."stg_customers" 14 | where customer_id is null 15 | 16 | 17 | 18 | 19 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/not_null_fct_revenue_daily_order_day.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | 11 | 12 | select order_day 13 | from "db"."public_analytics"."fct_revenue_daily" 14 | where order_day is null 15 | 16 | 17 | 18 | 19 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | dbt_packages/ 5 | logs/ 6 | venv/ 7 | __pycache__ 8 | .tox/ 9 | /.pytest_cache/ 10 | 11 | 12 | # Ignore all directories that start with 'env-' and can have any name after 13 | env*/ 14 | 15 | # Do not ignore .env files in any directory and do not ignore .env directories 16 | !.env 17 | !*/.env/ 18 | 19 | # But explicitly ignore test.env files 20 | test.env 21 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_generate_surrogate_key.csv: -------------------------------------------------------------------------------- 1 | column_1,column_2,column_3,expected_column_1_only,expected_all_columns 2 | a,b,c,0cc175b9c0f1b6a831c399e269772661,7b193b3d33184464106f41ddf733783b 3 | a,,c,0cc175b9c0f1b6a831c399e269772661,4f32a73dc87b7bbb7a654d8898d58c7e 4 | ,,c,f14cc5cdce0420f4a5a6b6d9d7b85f39,d9c538b129f1a3ad6ecfe55345c32a05 5 | ,,,f14cc5cdce0420f4a5a6b6d9d7b85f39,2fa5491950d66d153d23cfbcfea4e164 6 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_star_no_columns.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select 4 | {{ dbt_utils.star(from=ref('data_star'), except=['field_1', 'field_2', 'field_3']) }} 5 | -- if star() returns `*` or a list of columns, this query will fail because there's no comma between the columns 6 | 1 as canary_column 7 | from {{ ref('data_star') }} 8 | 9 | ) 10 | 11 | select * from data 12 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/not_null_fct_marketing_attribution_session_day.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | 11 | 12 | select session_day 13 | from "db"."public_analytics"."fct_marketing_attribution" 14 | where session_day is null 15 | 16 | 17 | 18 | 19 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/web/test_urls.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_urls') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.get_url_parameter('url', 'utm_medium') }} as actual, 10 | medium as expected 11 | 12 | from data 13 | 14 | union all 15 | 16 | select 17 | {{ dbt_utils.get_url_parameter('url', 'utm_source') }} as actual, 18 | source as expected 19 | 20 | from data 21 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/marts/fct_revenue_daily.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | create table "db"."public_analytics"."fct_revenue_daily__dbt_tmp" 6 | 7 | 8 | as 9 | 10 | ( 11 | with p as ( 12 | select * from "db"."public_staging"."stg_payments" 13 | ) 14 | select 15 | order_day, 16 | product, 17 | country, 18 | sum(net_revenue) as net_revenue, 19 | count(*) as orders 20 | from p 21 | group by 1,2,3 22 | ); 23 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_deduplicate.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | source as ( 4 | select * 5 | from {{ ref('data_deduplicate') }} 6 | where user_id = 1 7 | ), 8 | 9 | deduped as ( 10 | 11 | {{ 12 | dbt_utils.deduplicate( 13 | 'source', 14 | partition_by='user_id', 15 | order_by='version desc', 16 | ) | indent 17 | }} 18 | 19 | ) 20 | 21 | select * from deduped 22 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_star_prefix_suffix.sql: -------------------------------------------------------------------------------- 1 | {% set prefix_with = 'prefix_' if target.type != 'snowflake' else 'PREFIX_' %} 2 | {% set suffix_with = '_suffix' if target.type != 'snowflake' else '_SUFFIX' %} 3 | 4 | with data as ( 5 | 6 | select 7 | {{ dbt_utils.star(from=ref('data_star'), prefix=prefix_with, suffix=suffix_with) }} 8 | 9 | from {{ ref('data_star') }} 10 | 11 | ) 12 | 13 | select * from data -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/marts/dim_customer.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | create table "db"."public_analytics"."dim_customer__dbt_tmp" 6 | 7 | 8 | as 9 | 10 | ( 11 | with c as ( 12 | select * from "db"."public_staging"."stg_customers" 13 | ) 14 | select 15 | customer_id, 16 | company_name, 17 | country, 18 | industry, 19 | company_size, 20 | signup_date, 21 | is_churned, 22 | signup_month 23 | from c 24 | ); 25 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Show location of local install of dbt 4 | echo $(which dbt) 5 | 6 | # Show version and installed adapters 7 | dbt --version 8 | 9 | # Set the profile 10 | cd integration_tests 11 | export DBT_PROFILES_DIR=. 12 | 13 | # Show the location of the profiles directory and test the connection 14 | dbt debug --target $1 15 | 16 | dbt deps --target $1 || exit 1 17 | dbt build --target $1 --full-refresh || exit 1 18 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/accepted_values_stg_payments_0a68bd20fe58431edb2dbf71cff25165.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | with all_values as ( 6 | 7 | select 8 | status as value_field, 9 | count(*) as n_records 10 | 11 | from "db"."public_staging"."stg_payments" 12 | group by status 13 | 14 | ) 15 | 16 | select * 17 | from all_values 18 | where value_field not in ( 19 | 'succeeded','failed','refunded' 20 | ) 21 | 22 | 23 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_generate_surrogate_key.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_generate_surrogate_key') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.generate_surrogate_key(['column_1']) }} as actual_column_1_only, 10 | expected_column_1_only, 11 | {{ dbt_utils.generate_surrogate_key(['column_1', 'column_2', 'column_3']) }} as actual_all_columns_list, 12 | expected_all_columns 13 | 14 | from data 15 | -------------------------------------------------------------------------------- /dbt/package-lock.yml: -------------------------------------------------------------------------------- 1 | 2 | # package-lock.yml is auto-generated by `dbt deps` to lock exact versions and hashes of installed dbt packages. 3 | # It ensures reproducible builds and should not be edited manually—update via `dbt deps` instead. 4 | 5 | packages: 6 | - package: dbt-labs/dbt_utils # pinned dependency 7 | version: 1.3.1 # exact version resolved from packages.yml 8 | sha1_hash: dd1e1feb2d2bbce79e7a255cd309a60e6548df0b # integrity check of packages 9 | -------------------------------------------------------------------------------- /dbt/packages.yml: -------------------------------------------------------------------------------- 1 | # packages.yml lists external dbt packages (macros, models) that this project depends on. 2 | # Running `dbt deps` installs them under `dbt_packages/`, each with its own `dbt_project.yml`. 3 | # These package configs don’t override your main project; they just provide additional features. 4 | packages: 5 | - package: dbt-labs/dbt_utils # Community-maintained helper macros and tests 6 | version: [">=1.0.0", "<2.0.0"] # Pin to major version 1.x for compatibility 7 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_groupby.sql: -------------------------------------------------------------------------------- 1 | with test_data as ( 2 | 3 | select 4 | 5 | {{ safe_cast("'a'", type_string() )}} as column_1, 6 | {{ safe_cast("'b'", type_string() )}} as column_2 7 | 8 | ), 9 | 10 | grouped as ( 11 | 12 | select 13 | *, 14 | count(*) as total 15 | 16 | from test_data 17 | {{ dbt_utils.group_by(2) }} 18 | 19 | ) 20 | 21 | select * from grouped 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/sql/data_width_bucket.csv: -------------------------------------------------------------------------------- 1 | date_col,amount,num_buckets,min_value,max_value,bucket 2 | 2012-08-01,190000.00,4,200000.0,600000.0,0 3 | 2013-08-01,290000.00,4,200000.0,600000.0,1 4 | 2014-02-01,320000.00,4,200000.0,600000.0,2 5 | 2015-04-01,399999.99,4,200000.0,600000.0,2 6 | 2016-04-01,400000.00,4,200000.0,600000.0,3 7 | 2017-04-01,470000.00,4,200000.0,600000.0,3 8 | 2018-04-01,510000.00,4,200000.0,600000.0,4 9 | 2019-04-01,610000.00,4,200000.0,600000.0,5 10 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/web/data_url_host.csv: -------------------------------------------------------------------------------- 1 | original_url,parsed_url 2 | www.google.co.uk?utm_source=google&utm_medium=cpc&utm_campaign=spring-summer,www.google.co.uk 3 | http://witanddelight.com/2018/01/tips-tricks-how-run-half-marathon-first-time/,witanddelight.com 4 | https://www.nytimes.com/2018/01/01/blog,www.nytimes.com 5 | android-app://m.facebook.com/,m.facebook.com 6 | docs.nytimes.com/2021/01/01/index.js?utm_source=google,docs.nytimes.com 7 | https://m.facebook.com/,m.facebook.com -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/nullcheck.sql: -------------------------------------------------------------------------------- 1 | {% macro nullcheck(cols) %} 2 | {{ return(adapter.dispatch('nullcheck', 'dbt_utils')(cols)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__nullcheck(cols) %} 6 | {%- for col in cols %} 7 | 8 | {% if col.is_string() -%} 9 | 10 | nullif({{col.name}},'') as {{col.name}} 11 | 12 | {%- else -%} 13 | 14 | {{col.name}} 15 | 16 | {%- endif -%} 17 | 18 | {%- if not loop.last -%} , {%- endif -%} 19 | 20 | {%- endfor -%} 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/models.yml/accepted_values_stg_sessions_ff93056935f7dcedf7680c76caed8239.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | with all_values as ( 6 | 7 | select 8 | source as value_field, 9 | count(*) as n_records 10 | 11 | from "db"."public_staging"."stg_sessions" 12 | group by source 13 | 14 | ) 15 | 16 | select * 17 | from all_values 18 | where value_field not in ( 19 | 'google','direct','facebook','linkedin','newsletter','referral','bing' 20 | ) 21 | 22 | 23 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/unique_stg_payments_payment_id.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | select 11 | payment_id as unique_field, 12 | count(*) as n_records 13 | 14 | from "db"."public_staging"."stg_payments" 15 | where payment_id is not null 16 | group by payment_id 17 | having count(*) > 1 18 | 19 | 20 | 21 | 22 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/unique_stg_sessions_session_id.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | select 11 | session_id as unique_field, 12 | count(*) as n_records 13 | 14 | from "db"."public_staging"."stg_sessions" 15 | where session_id is not null 16 | group by session_id 17 | having count(*) > 1 18 | 19 | 20 | 21 | 22 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_pivot.sql: -------------------------------------------------------------------------------- 1 | 2 | -- TODO: How do we make this work nicely on Snowflake too? 3 | 4 | {% if target.type == 'snowflake' %} 5 | {% set column_values = ['RED', 'BLUE'] %} 6 | {% set cmp = 'ilike' %} 7 | {% else %} 8 | {% set column_values = ['red', 'blue'] %} 9 | {% set cmp = '=' %} 10 | {% endif %} 11 | 12 | select 13 | size, 14 | {{ dbt_utils.pivot('color', column_values, cmp=cmp) }} 15 | 16 | from {{ ref('data_pivot') }} 17 | group by size 18 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/unique_stg_customers_customer_id.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | select 11 | customer_id as unique_field, 12 | count(*) as n_records 13 | 14 | from "db"."public_staging"."stg_customers" 15 | where customer_id is not null 16 | group by customer_id 17 | having count(*) > 1 18 | 19 | 20 | 21 | 22 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/web/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_urls 5 | data_tests: 6 | - assert_equal: 7 | actual: actual 8 | expected: expected 9 | 10 | - name: test_url_host 11 | data_tests: 12 | - assert_equal: 13 | actual: actual 14 | expected: expected 15 | 16 | - name: test_url_path 17 | data_tests: 18 | - assert_equal: 19 | actual: actual 20 | expected: expected -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/web/get_url_parameter.sql: -------------------------------------------------------------------------------- 1 | {% macro get_url_parameter(field, url_parameter) -%} 2 | {{ return(adapter.dispatch('get_url_parameter', 'dbt_utils')(field, url_parameter)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_url_parameter(field, url_parameter) -%} 6 | 7 | {%- set formatted_url_parameter = "'" + url_parameter + "='" -%} 8 | 9 | {%- set split = dbt.split_part(dbt.split_part(field, formatted_url_parameter, 2), "'&'", 1) -%} 10 | 11 | nullif({{ split }},'') 12 | 13 | {%- endmacro %} 14 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | 2 | create view "db"."public_staging"."stg_customers__dbt_tmp" 3 | 4 | 5 | as ( 6 | with src as ( 7 | select * from raw.customers 8 | ) 9 | select 10 | customer_id, 11 | company_name, 12 | country, 13 | industry, 14 | company_size, 15 | signup_date::timestamp as signup_date, 16 | updated_at::timestamp as updated_at, 17 | is_churned::boolean as is_churned, 18 | date_trunc('month', signup_date)::date as signup_month 19 | from src 20 | ); -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_relations_by_pattern.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized = 'table') }} 2 | 3 | -- depends_on: {{ ref('data_events_20180101') }}, {{ ref('data_events_20180102') }}, {{ ref('data_events_20180103') }} 4 | 5 | {% set relations = dbt_utils.get_relations_by_pattern(target.schema ~ '%', 'data_events_%') %} 6 | 7 | with unioned as ( 8 | 9 | {{ dbt_utils.union_relations(relations) }} 10 | 11 | ) 12 | 13 | select 14 | 15 | user_id, 16 | event 17 | 18 | from unioned 19 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_star_quote_identifiers.sql: -------------------------------------------------------------------------------- 1 | select 2 | {{ dbt.string_literal(adapter.quote("column_one")) | lower }} as expected, 3 | {{ dbt.string_literal(dbt_utils.star(from=ref('data_star_quote_identifiers'), quote_identifiers=True)) | trim | lower }} as actual 4 | 5 | union all 6 | 7 | select 8 | {{ dbt.string_literal("column_one") | lower }} as expected, 9 | {{ dbt.string_literal(dbt_utils.star(from=ref('data_star_quote_identifiers'), quote_identifiers=False)) | trim | lower }} as actual -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/marts/fct_marketing_attribution.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | create table "db"."public_analytics"."fct_marketing_attribution__dbt_tmp" 6 | 7 | 8 | as 9 | 10 | ( 11 | with s as ( 12 | select * from "db"."public_staging"."stg_sessions" 13 | ) 14 | select 15 | session_day, 16 | source, 17 | medium, 18 | coalesce(campaign,'') as campaign, 19 | sum(converted::int) as conversions, 20 | avg((not bounced)::int)::float as engagement_rate 21 | from s 22 | group by 1,2,3,4 23 | ); 24 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/web/data_url_path.csv: -------------------------------------------------------------------------------- 1 | original_url,parsed_path 2 | www.google.co.uk?utm_source=google&utm_medium=cpc&utm_campaign=spring-summer, 3 | http://witanddelight.com/2018/01/tips-tricks-how-run-half-marathon-first-time/,2018/01/tips-tricks-how-run-half-marathon-first-time/ 4 | https://www.nytimes.com/2018/01/01/blog,2018/01/01/blog 5 | http://witanddelight.com/2018/01/tips-tricks-how-run-half-marathon-first-time/?utm_source=google&utm_medium=cpc&utm_campaign=spring-summer,2018/01/tips-tricks-how-run-half-marathon-first-time/ -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/nullcheck_table.sql: -------------------------------------------------------------------------------- 1 | {% macro nullcheck_table(relation) %} 2 | {{ return(adapter.dispatch('nullcheck_table', 'dbt_utils')(relation)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__nullcheck_table(relation) %} 6 | 7 | {%- do dbt_utils._is_relation(relation, 'nullcheck_table') -%} 8 | {%- do dbt_utils._is_ephemeral(relation, 'nullcheck_table') -%} 9 | {% set cols = adapter.get_columns_in_relation(relation) %} 10 | 11 | select {{ dbt_utils.nullcheck(cols) }} 12 | from {{relation}} 13 | 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/jinja_helpers/test_slugify.sql: -------------------------------------------------------------------------------- 1 | with comparisons as ( 2 | select '{{ dbt_utils.slugify("") }}' as output, '' as expected 3 | union all 4 | select '{{ dbt_utils.slugify(None) }}' as output, '' as expected 5 | union all 6 | select '{{ dbt_utils.slugify("!Hell0 world-hi") }}' as output, 'hell0_world_hi' as expected 7 | union all 8 | select '{{ dbt_utils.slugify("0Hell0 world-hi") }}' as output, '_0hell0_world_hi' as expected 9 | ) 10 | 11 | select * 12 | from comparisons 13 | where output != expected 14 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_generate_series.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake doesn't like this as a view because the `generate_series` 3 | -- call creates a CTE called `unioned`, as does the `equality` generic test. 4 | -- Ideally, Snowflake would be smart enough to know that these CTE names are 5 | -- different, as they live in different relations. TODO: use a less common cte name 6 | 7 | {{ config(materialized='table') }} 8 | 9 | with data as ( 10 | 11 | {{ dbt_utils.generate_series(10) }} 12 | 13 | ) 14 | 15 | select generated_number from data 16 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_pivot_apostrophe.sql: -------------------------------------------------------------------------------- 1 | 2 | -- TODO: How do we make this work nicely on Snowflake too? 3 | 4 | {% if target.type == 'snowflake' %} 5 | {% set column_values = ['RED', 'BLUE', "BLUE'S"] %} 6 | {% set cmp = 'ilike' %} 7 | {% else %} 8 | {% set column_values = ['red', 'blue', "blue's"] %} 9 | {% set cmp = '=' %} 10 | {% endif %} 11 | 12 | select 13 | size, 14 | {{ dbt_utils.pivot('color', column_values, cmp=cmp, quote_identifiers=False) }} 15 | 16 | from {{ ref('data_pivot') }} 17 | group by size 18 | -------------------------------------------------------------------------------- /dbt/models/staging/stg_sessions.sql: -------------------------------------------------------------------------------- 1 | with src as ( 2 | select * from raw.sessions 3 | ) 4 | select 5 | session_id, 6 | nullif(customer_id::text,'')::uuid as customer_id, 7 | lower(source) as source, 8 | lower(medium) as medium, 9 | campaign, 10 | lower(device) as device, 11 | country, 12 | pageviews, 13 | session_duration_s, 14 | (bounced=1) as bounced, 15 | (converted=1) as converted, 16 | session_start::timestamp as session_start, 17 | updated_at::timestamp as updated_at, 18 | date_trunc('day', session_start)::date as session_day 19 | from src 20 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_star_aggregate.sql: -------------------------------------------------------------------------------- 1 | /*This test checks that column aliases aren't applied unless there's a prefix/suffix necessary, to ensure that GROUP BYs keep working*/ 2 | 3 | {% set selected_columns = dbt_utils.star(from=ref('data_star_aggregate'), except=['value_field']) %} 4 | 5 | with data as ( 6 | 7 | select 8 | {{ selected_columns }}, 9 | sum(value_field) as value_field_sum 10 | 11 | from {{ ref('data_star_aggregate') }} 12 | group by {{ selected_columns }} 13 | 14 | ) 15 | 16 | select * from data 17 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_filtered_columns_in_relation.sql: -------------------------------------------------------------------------------- 1 | {% set exclude_field = 'field_1' %} 2 | {% set column_names = dbt_utils.get_filtered_columns_in_relation(from= ref('data_filtered_columns_in_relation'), except=[exclude_field]) %} 3 | 4 | with data as ( 5 | 6 | select 7 | 8 | {% for column_name in column_names %} 9 | max({{ column_name }}) as {{ column_name }} {% if not loop.last %},{% endif %} 10 | {% endfor %} 11 | 12 | from {{ ref('data_filtered_columns_in_relation') }} 13 | 14 | ) 15 | 16 | select * from data 17 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/staging/stg_sessions.sql: -------------------------------------------------------------------------------- 1 | with src as ( 2 | select * from raw.sessions 3 | ) 4 | select 5 | session_id, 6 | nullif(customer_id::text,'')::uuid as customer_id, 7 | lower(source) as source, 8 | lower(medium) as medium, 9 | campaign, 10 | lower(device) as device, 11 | country, 12 | pageviews, 13 | session_duration_s, 14 | (bounced=1) as bounced, 15 | (converted=1) as converted, 16 | session_start::timestamp as session_start, 17 | updated_at::timestamp as updated_at, 18 | date_trunc('day', session_start)::date as session_day 19 | from src -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/docs/decisions/README.md: -------------------------------------------------------------------------------- 1 | ## ADRs 2 | 3 | For any architectural/engineering decisions we make, we will create an [ADR (Architectural Decision Record)](https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions) to keep track of what decision we made and why. This allows us to refer back to decisions in the future and see if the reasons we made a choice still holds true. This also allows for others to more easily understand the code. ADRs will follow this process (or its replacement): 4 | - [adr-0000-documenting-architecture-decisions.md](adr-0000-documenting-architecture-decisions.md) 5 | -------------------------------------------------------------------------------- /dbt/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | with src as ( 2 | select * from raw.payments 3 | ), success as ( 4 | select *, (amount - coalesce(fee,0) - coalesce(refunded_amount,0))::numeric as net_revenue 5 | from src 6 | where status = 'succeeded' 7 | ) 8 | select 9 | payment_id, 10 | customer_id, 11 | product, 12 | amount, 13 | currency, 14 | status, 15 | refunded_amount, 16 | fee, 17 | payment_method, 18 | country, 19 | created_at::timestamp as created_at, 20 | updated_at::timestamp as updated_at, 21 | net_revenue, 22 | date_trunc('day', created_at)::date as order_day 23 | from success 24 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equality_floats_a.csv: -------------------------------------------------------------------------------- 1 | id,float_number 2 | 05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186 3 | cfae9054-940b-42a1-84d4-052daae6194f,81.2511656 4 | 6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675 5 | c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841 6 | 59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434 7 | b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425 8 | 26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223 9 | b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680 10 | 11c979b7-2661-4375-8143-7c9b54b90627,19.5755431 11 | a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equality_floats_b.csv: -------------------------------------------------------------------------------- 1 | id,float_number 2 | 05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888187 3 | cfae9054-940b-42a1-84d4-052daae6194f,81.2511657 4 | 6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959676 5 | c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100842 6 | 59caed0d-53d6-473c-a88c-3726c7693f05,68.6029435 7 | b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861426 8 | 26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662224 9 | b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524681 10 | 11c979b7-2661-4375-8143-7c9b54b90627,19.5755432 11 | a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237048 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/sql/test_get_single_value_multiple_rows.sql: -------------------------------------------------------------------------------- 1 | {% set query %} 2 | with input as ( 3 | select 1 as id, 4 as di 4 | union all 5 | select 2 as id, 5 as di 6 | union all 7 | select 3 as id, 6 as di 8 | ) 9 | {% endset %} 10 | 11 | with comparisons as ( 12 | select {{ dbt_utils.get_single_value(query ~ " select min(id) from input") }} as output, 1 as expected 13 | union all 14 | select {{ dbt_utils.get_single_value(query ~ " select max(di) from input") }} as output, 6 as expected 15 | ) 16 | select * 17 | from comparisons 18 | where output != expected -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_tables_by_prefix_sql.sql: -------------------------------------------------------------------------------- 1 | {% macro get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %} 2 | {{ return(adapter.dispatch('get_tables_by_prefix_sql', 'dbt_utils')(schema, prefix, exclude, database)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %} 6 | 7 | {{ dbt_utils.get_tables_by_pattern_sql( 8 | schema_pattern = schema, 9 | table_pattern = prefix ~ '%', 10 | exclude = exclude, 11 | database = database 12 | ) }} 13 | 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /dbt/target/compiled/api_bi_project/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | with src as ( 2 | select * from raw.payments 3 | ), success as ( 4 | select *, (amount - coalesce(fee,0) - coalesce(refunded_amount,0))::numeric as net_revenue 5 | from src 6 | where status = 'succeeded' 7 | ) 8 | select 9 | payment_id, 10 | customer_id, 11 | product, 12 | amount, 13 | currency, 14 | status, 15 | refunded_amount, 16 | fee, 17 | payment_method, 18 | country, 19 | created_at::timestamp as created_at, 20 | updated_at::timestamp as updated_at, 21 | net_revenue, 22 | date_trunc('day', created_at)::date as order_day 23 | from success -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-dotenv 3 | dbt-core@git+https://github.com/dbt-labs/dbt-core.git#subdirectory=core 4 | dbt-tests-adapter@git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter 5 | dbt-postgres@git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-postgres 6 | dbt-redshift@git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-redshift 7 | dbt-snowflake@git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-snowflake 8 | dbt-bigquery@git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-bigquery 9 | pytest-xdist 10 | tox>=3.13 11 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/jinja_helpers/slugify.sql: -------------------------------------------------------------------------------- 1 | {% macro slugify(string) %} 2 | 3 | {% if not string %} 4 | {{ return('') }} 5 | {% endif %} 6 | 7 | {#- Lower case the string -#} 8 | {% set string = string | lower %} 9 | {#- Replace spaces and dashes with underscores -#} 10 | {% set string = modules.re.sub('[ -]+', '_', string) %} 11 | {#- Only take letters, numbers, and underscores -#} 12 | {% set string = modules.re.sub('[^a-z0-9_]+', '', string) %} 13 | {#- Prepends "_" if string begins with a number -#} 14 | {% set string = modules.re.sub('^[0-9]', '_' + string[0], string) %} 15 | 16 | {{ return(string) }} 17 | 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | seeds: 4 | - name: data_test_sequential_values 5 | columns: 6 | - name: my_even_sequence 7 | data_tests: 8 | - dbt_utils.sequential_values: 9 | interval: 2 10 | - dbt_utils.sequential_values: 11 | interval: 2 12 | group_by_columns: ['col_a'] 13 | 14 | 15 | - name: data_test_sequential_timestamps 16 | columns: 17 | - name: my_timestamp 18 | data_tests: 19 | - dbt_utils.sequential_values: 20 | interval: 1 21 | datepart: 'hour' 22 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/expression_is_true.sql: -------------------------------------------------------------------------------- 1 | {% test expression_is_true(model, expression, column_name=None) %} 2 | {{ return(adapter.dispatch('test_expression_is_true', 'dbt_utils')(model, expression, column_name)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_expression_is_true(model, expression, column_name) %} 6 | 7 | {% set column_list = '*' if should_store_failures() else "1" %} 8 | 9 | select 10 | {{ column_list }} 11 | from {{ model }} 12 | {% if column_name is none %} 13 | where not({{ expression }}) 14 | {%- else %} 15 | where not({{ column_name }} {{ expression }}) 16 | {%- endif %} 17 | 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/accepted_values_stg_payments_0a68bd20fe58431edb2dbf71cff25165.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | with all_values as ( 11 | 12 | select 13 | status as value_field, 14 | count(*) as n_records 15 | 16 | from "db"."public_staging"."stg_payments" 17 | group by status 18 | 19 | ) 20 | 21 | select * 22 | from all_values 23 | where value_field not in ( 24 | 'succeeded','failed','refunded' 25 | ) 26 | 27 | 28 | 29 | 30 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equality_floats_columns_a.csv: -------------------------------------------------------------------------------- 1 | id,float_number,to_ignore 2 | 05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186,a 3 | cfae9054-940b-42a1-84d4-052daae6194f,81.2511656,a 4 | 6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675,a 5 | c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841,a 6 | 59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434,a 7 | b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425,a 8 | 26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223,a 9 | b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680,a 10 | 11c979b7-2661-4375-8143-7c9b54b90627,19.5755431,a 11 | a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047,a 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/data/schema_tests/data_test_equality_floats_columns_b.csv: -------------------------------------------------------------------------------- 1 | id,float_number,to_ignore 2 | 05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186,b 3 | cfae9054-940b-42a1-84d4-052daae6194f,81.2511656,b 4 | 6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675,b 5 | c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841,b 6 | 59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434,b 7 | b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425,b 8 | 26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223,b 9 | b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680,b 10 | 11c979b7-2661-4375-8143-7c9b54b90627,19.5755431,b 11 | a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047,b 12 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/geo/test_haversine_distance_km.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | select * from {{ ref('data_haversine_km') }} 3 | ), 4 | final as ( 5 | select 6 | output as expected, 7 | cast( 8 | {{ 9 | dbt_utils.haversine_distance( 10 | lat1='lat_1', 11 | lon1='lon_1', 12 | lat2='lat_2', 13 | lon2='lon_2', 14 | unit='km' 15 | ) 16 | }} as {{ type_numeric() }} 17 | ) as actual 18 | from data 19 | ) 20 | select 21 | expected, 22 | round(actual,0) as actual 23 | from final 24 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/models.yml/accepted_values_stg_sessions_ff93056935f7dcedf7680c76caed8239.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(*) as failures, 3 | count(*) != 0 as should_warn, 4 | count(*) != 0 as should_error 5 | from ( 6 | 7 | 8 | 9 | 10 | with all_values as ( 11 | 12 | select 13 | source as value_field, 14 | count(*) as n_records 15 | 16 | from "db"."public_staging"."stg_sessions" 17 | group by source 18 | 19 | ) 20 | 21 | select * 22 | from all_values 23 | where value_field not in ( 24 | 'google','direct','facebook','linkedin','newsletter','referral','bing' 25 | ) 26 | 27 | 28 | 29 | 30 | ) dbt_internal_test -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/macros/assert_equal_values.sql: -------------------------------------------------------------------------------- 1 | {% macro assert_equal_values(actual_object, expected_object) %} 2 | {% if not execute %} 3 | 4 | {# pass #} 5 | 6 | {% elif actual_object != expected_object %} 7 | 8 | {% set msg %} 9 | Expected did not match actual 10 | 11 | ----------- 12 | Actual: 13 | ----------- 14 | --->{{ actual_object }}<--- 15 | 16 | ----------- 17 | Expected: 18 | ----------- 19 | --->{{ expected_object }}<--- 20 | 21 | {% endset %} 22 | 23 | {{ log(msg, info=True) }} 24 | 25 | select 'fail' 26 | 27 | {% else %} 28 | 29 | select 'ok' {{ limit_zero() }} 30 | 31 | {% endif %} 32 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/staging/stg_sessions.sql: -------------------------------------------------------------------------------- 1 | 2 | create view "db"."public_staging"."stg_sessions__dbt_tmp" 3 | 4 | 5 | as ( 6 | with src as ( 7 | select * from raw.sessions 8 | ) 9 | select 10 | session_id, 11 | nullif(customer_id::text,'')::uuid as customer_id, 12 | lower(source) as source, 13 | lower(medium) as medium, 14 | campaign, 15 | lower(device) as device, 16 | country, 17 | pageviews, 18 | session_duration_s, 19 | (bounced=1) as bounced, 20 | (converted=1) as converted, 21 | session_start::timestamp as session_start, 22 | updated_at::timestamp as updated_at, 23 | date_trunc('day', session_start)::date as session_day 24 | from src 25 | ); -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/web/get_url_host.sql: -------------------------------------------------------------------------------- 1 | {% macro get_url_host(field) -%} 2 | {{ return(adapter.dispatch('get_url_host', 'dbt_utils')(field)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_url_host(field) -%} 6 | 7 | {%- set parsed = 8 | dbt.split_part( 9 | dbt.split_part( 10 | dbt.replace( 11 | dbt.replace( 12 | dbt.replace(field, "'android-app://'", "''" 13 | ), "'http://'", "''" 14 | ), "'https://'", "''" 15 | ), "'/'", 1 16 | ), "'?'", 1 17 | ) 18 | 19 | -%} 20 | 21 | 22 | {{ dbt.safe_cast( 23 | parsed, 24 | dbt.type_string() 25 | )}} 26 | 27 | {%- endmacro %} 28 | -------------------------------------------------------------------------------- /dbt/target/run/api_bi_project/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | 2 | create view "db"."public_staging"."stg_payments__dbt_tmp" 3 | 4 | 5 | as ( 6 | with src as ( 7 | select * from raw.payments 8 | ), success as ( 9 | select *, (amount - coalesce(fee,0) - coalesce(refunded_amount,0))::numeric as net_revenue 10 | from src 11 | where status = 'succeeded' 12 | ) 13 | select 14 | payment_id, 15 | customer_id, 16 | product, 17 | amount, 18 | currency, 19 | status, 20 | refunded_amount, 21 | fee, 22 | payment_method, 23 | country, 24 | created_at::timestamp as created_at, 25 | updated_at::timestamp as updated_at, 26 | net_revenue, 27 | date_trunc('day', created_at)::date as order_day 28 | from success 29 | ); -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/jinja_helpers/_is_ephemeral.sql: -------------------------------------------------------------------------------- 1 | {% macro _is_ephemeral(obj, macro) %} 2 | {%- if obj.is_cte -%} 3 | {% set ephemeral_prefix = api.Relation.add_ephemeral_prefix('') %} 4 | {% if obj.name.startswith(ephemeral_prefix) %} 5 | {% set model_name = obj.name[(ephemeral_prefix|length):] %} 6 | {% else %} 7 | {% set model_name = obj.name %} 8 | {%- endif -%} 9 | {% set error_message %} 10 | The `{{ macro }}` macro cannot be used with ephemeral models, as it relies on the information schema. 11 | 12 | `{{ model_name }}` is an ephemeral model. Consider making it a view or table instead. 13 | {% endset %} 14 | {%- do exceptions.raise_compiler_error(error_message) -%} 15 | {%- endif -%} 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_nullcheck_table.sql: -------------------------------------------------------------------------------- 1 | {{ config( materialized = "table" ) }} 2 | 3 | -- TO DO: remove if-statement 4 | 5 | {% set tbl = ref('data_nullcheck_table') %} 6 | 7 | 8 | with nulled as ( 9 | 10 | {{ dbt_utils.nullcheck_table(tbl) }} 11 | 12 | ) 13 | 14 | {% if target.type == 'snowflake' %} 15 | 16 | select 17 | field_1::varchar as field_1, 18 | field_2::varchar as field_2, 19 | field_3::varchar as field_3 20 | 21 | from nulled 22 | 23 | {% else %} 24 | 25 | select 26 | 27 | {{ safe_cast('field_1', 28 | type_string() 29 | )}} as field_1, 30 | 31 | {{ safe_cast('field_2', 32 | type_string() 33 | )}} as field_2, 34 | 35 | {{ safe_cast('field_3', 36 | type_string() 37 | )}} as field_3 38 | 39 | from nulled 40 | 41 | {% endif %} 42 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL:=help 2 | 3 | .PHONY: test 4 | test: ## Run the integration tests. 5 | @\ 6 | tox -e dbt_integration_$(target) 7 | 8 | .PHONY: dev 9 | dev: ## Installs dbt-* packages in develop mode along with development dependencies. 10 | @\ 11 | echo "Install dbt-$(target)..."; \ 12 | pip install --upgrade pip setuptools; \ 13 | pip install --pre "dbt-$(target)" -r dev-requirements.txt; 14 | 15 | .PHONY: setup-db 16 | setup-db: ## Setup Postgres database with docker-compose for system testing. 17 | @\ 18 | docker-compose up --detach postgres 19 | 20 | .PHONY: help 21 | help: ## Show this help message. 22 | @echo 'usage: make [target]' 23 | @echo 24 | @echo 'targets:' 25 | @grep -E '^[8+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 26 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/safe_add.sql: -------------------------------------------------------------------------------- 1 | {%- macro safe_add(field_list) -%} 2 | {{ return(adapter.dispatch('safe_add', 'dbt_utils')(field_list)) }} 3 | {% endmacro %} 4 | 5 | {%- macro default__safe_add(field_list) -%} 6 | 7 | {%- if field_list is not iterable or field_list is string or field_list is mapping -%} 8 | 9 | {%- set error_message = ' 10 | Warning: the `safe_add` macro now takes a single list argument instead of \ 11 | string arguments. The {}.{} model triggered this warning. \ 12 | '.format(model.package_name, model.name) -%} 13 | 14 | {%- do exceptions.warn(error_message) -%} 15 | 16 | {%- endif -%} 17 | 18 | {% set fields = [] %} 19 | 20 | {%- for field in field_list -%} 21 | 22 | {% do fields.append("coalesce(" ~ field ~ ", 0)") %} 23 | 24 | {%- endfor -%} 25 | 26 | {{ fields|join(' +\n ') }} 27 | 28 | {%- endmacro -%} 29 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_column_values.sql: -------------------------------------------------------------------------------- 1 | 2 | {% set column_values = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default=[], order_by="field") %} 3 | 4 | 5 | {% if target.type == 'snowflake' %} 6 | 7 | select 8 | {% for val in column_values -%} 9 | 10 | sum(case when field = '{{ val }}' then 1 else 0 end) as count_{{ val }} 11 | {%- if not loop.last %},{% endif -%} 12 | 13 | {%- endfor %} 14 | 15 | from {{ ref('data_get_column_values') }} 16 | 17 | {% else %} 18 | 19 | select 20 | {% for val in column_values -%} 21 | 22 | {{ safe_cast("sum(case when field = '" ~ val ~ "' then 1 else 0 end)", type_string()) }} as count_{{ val }} 23 | {%- if not loop.last %},{% endif -%} 24 | 25 | {%- endfor %} 26 | 27 | from {{ ref('data_get_column_values') }} 28 | 29 | {% endif %} 30 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/safe_subtract.sql: -------------------------------------------------------------------------------- 1 | {%- macro safe_subtract(field_list) -%} 2 | {{ return(adapter.dispatch('safe_subtract', 'dbt_utils')(field_list)) }} 3 | {% endmacro %} 4 | 5 | {%- macro default__safe_subtract(field_list) -%} 6 | 7 | {%- if field_list is not iterable or field_list is string or field_list is mapping -%} 8 | 9 | {%- set error_message = ' 10 | Warning: the `safe_subtract` macro takes a single list argument instead of \ 11 | string arguments. The {}.{} model triggered this warning. \ 12 | '.format(model.package_name, model.name) -%} 13 | 14 | {%- do exceptions.raise_compiler_error(error_message) -%} 15 | 16 | {%- endif -%} 17 | 18 | {% set fields = [] %} 19 | 20 | {%- for field in field_list -%} 21 | 22 | {% do fields.append("coalesce(" ~ field ~ ", 0)") %} 23 | 24 | {%- endfor -%} 25 | 26 | {{ fields|join(' -\n ') }} 27 | 28 | {%- endmacro -%} 29 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/surrogate_key.sql: -------------------------------------------------------------------------------- 1 | {%- macro surrogate_key(field_list) -%} 2 | {% set frustrating_jinja_feature = varargs %} 3 | {{ return(adapter.dispatch('surrogate_key', 'dbt_utils')(field_list, *varargs)) }} 4 | {% endmacro %} 5 | 6 | {%- macro default__surrogate_key(field_list) -%} 7 | 8 | {%- set error_message = ' 9 | Warning: `dbt_utils.surrogate_key` has been replaced by \ 10 | `dbt_utils.generate_surrogate_key`. The new macro treats null values \ 11 | differently to empty strings. To restore the behaviour of the original \ 12 | macro, add a global variable in dbt_project.yml called \ 13 | `surrogate_key_treat_nulls_as_empty_strings` to your \ 14 | dbt_project.yml file with a value of True. \ 15 | The {}.{} model triggered this warning. \ 16 | '.format(model.package_name, model.name) -%} 17 | 18 | {%- do exceptions.raise_compiler_error(error_message) -%} 19 | 20 | {%- endmacro -%} 21 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/not_constant.sql: -------------------------------------------------------------------------------- 1 | 2 | {% test not_constant(model, column_name, group_by_columns = []) %} 3 | {{ return(adapter.dispatch('test_not_constant', 'dbt_utils')(model, column_name, group_by_columns)) }} 4 | {% endtest %} 5 | 6 | {% macro default__test_not_constant(model, column_name, group_by_columns) %} 7 | 8 | {% if group_by_columns|length() > 0 %} 9 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 10 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 11 | {% endif %} 12 | 13 | 14 | select 15 | {# In TSQL, subquery aggregate columns need aliases #} 16 | {# thus: a filler col name, 'filler_column' #} 17 | {{select_gb_cols}} 18 | count(distinct {{ column_name }}) as filler_column 19 | 20 | from {{ model }} 21 | 22 | {{groupby_gb_cols}} 23 | 24 | having count(distinct {{ column_name }}) = 1 25 | 26 | 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/not_empty_string.sql: -------------------------------------------------------------------------------- 1 | {% test not_empty_string(model, column_name, trim_whitespace=true) %} 2 | 3 | {{ return(adapter.dispatch('test_not_empty_string', 'dbt_utils')(model, column_name, trim_whitespace)) }} 4 | 5 | {% endtest %} 6 | 7 | {% macro default__test_not_empty_string(model, column_name, trim_whitespace=true) %} 8 | 9 | with 10 | 11 | all_values as ( 12 | 13 | select 14 | 15 | 16 | {% if trim_whitespace == true -%} 17 | 18 | trim({{ column_name }}) as {{ column_name }} 19 | 20 | {%- else -%} 21 | 22 | {{ column_name }} 23 | 24 | {%- endif %} 25 | 26 | from {{ model }} 27 | 28 | ), 29 | 30 | errors as ( 31 | 32 | select * from all_values 33 | where {{ column_name }} = '' 34 | 35 | ) 36 | 37 | select * from errors 38 | 39 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_query_results_as_dict.sql: -------------------------------------------------------------------------------- 1 | {% macro get_query_results_as_dict(query) %} 2 | {{ return(adapter.dispatch('get_query_results_as_dict', 'dbt_utils')(query)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_query_results_as_dict(query) %} 6 | 7 | {# This macro returns a dictionary of the form {column_name: (tuple_of_results)} #} 8 | 9 | {%- call statement('get_query_results', fetch_result=True,auto_begin=false) -%} 10 | 11 | {{ query }} 12 | 13 | {%- endcall -%} 14 | 15 | {% set sql_results={} %} 16 | 17 | {%- if execute -%} 18 | {% set sql_results_table = load_result('get_query_results').table.columns %} 19 | {% for column_name, column in sql_results_table.items() %} 20 | {% do sql_results.update({column_name: column.values()}) %} 21 | {% endfor %} 22 | {%- endif -%} 23 | 24 | {{ return(sql_results) }} 25 | 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/generate_surrogate_key.sql: -------------------------------------------------------------------------------- 1 | {%- macro generate_surrogate_key(field_list) -%} 2 | {{ return(adapter.dispatch('generate_surrogate_key', 'dbt_utils')(field_list)) }} 3 | {% endmacro %} 4 | 5 | {%- macro default__generate_surrogate_key(field_list) -%} 6 | 7 | {%- if var('surrogate_key_treat_nulls_as_empty_strings', False) -%} 8 | {%- set default_null_value = "" -%} 9 | {%- else -%} 10 | {%- set default_null_value = '_dbt_utils_surrogate_key_null_' -%} 11 | {%- endif -%} 12 | 13 | {%- set fields = [] -%} 14 | 15 | {%- for field in field_list -%} 16 | 17 | {%- do fields.append( 18 | "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')" 19 | ) -%} 20 | 21 | {%- if not loop.last %} 22 | {%- do fields.append("'-'") -%} 23 | {%- endif -%} 24 | 25 | {%- endfor -%} 26 | 27 | {{ dbt.hash(dbt.concat(fields)) }} 28 | 29 | {%- endmacro -%} 30 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # For issues that have been open for awhile without activity, label 3 | # them as stale with a warning that they will be closed out. If 4 | # anyone comments to keep the issue open, it will automatically 5 | # remove the stale label and keep it open. 6 | 7 | # Stale label rules: 8 | # awaiting_response, more_information_needed -> 90 days 9 | # good_first_issue, help_wanted -> 360 days (a year) 10 | # tech_debt -> 720 (2 years) 11 | # all else defaults -> 180 days (6 months) 12 | 13 | # **why?** 14 | # To keep the repo in a clean state from issues that aren't relevant anymore 15 | 16 | # **when?** 17 | # Once a day 18 | 19 | name: "Close stale issues and PRs" 20 | on: 21 | schedule: 22 | - cron: "30 1 * * *" 23 | 24 | permissions: 25 | issues: write 26 | pull-requests: write 27 | 28 | jobs: 29 | stale: 30 | uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main 31 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this package 4 | title: '' 5 | labels: enhancement, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the feature 11 | A clear and concise description of what you want to happen. 12 | 13 | ### Describe alternatives you've considered 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | ### Additional context 17 | Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here. 18 | 19 | ### Who will this benefit? 20 | What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. 21 | 22 | ### Are you interested in contributing this feature? 23 | 26 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_not_empty_string_failing.sql: -------------------------------------------------------------------------------- 1 | -- dbt seed casts '' as NULL, so we need to select empty strings to enable testing 2 | 3 | with blank_data as ( 4 | 5 | select 6 | 1 as id, 7 | 'not an empty string' as string_trim_whitespace_true 8 | 9 | union all 10 | 11 | select 12 | 2 as id, 13 | 'also not an empty string' as string_trim_whitespace_true 14 | 15 | union all 16 | 17 | select 18 | 3 as id, 19 | 'string with trailing whitespace ' as string_trim_whitespace_true 20 | 21 | union all 22 | 23 | select 24 | 4 as id, 25 | ' ' as string_trim_whitespace_true 26 | 27 | union all 28 | 29 | select 30 | 5 as id, 31 | '' as string_trim_whitespace_true 32 | 33 | union all 34 | 35 | select 36 | 6 as id, 37 | null as string_trim_whitespace_true 38 | 39 | ) 40 | 41 | select * from blank_data -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/workflows/create-table-of-contents.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Update table of contents 4 | 5 | # Controls when the workflow will run 6 | 7 | # Never! 8 | on: [] 9 | 10 | # Disabled by Doug Beatty on 2024-04-25 to fix CI 11 | # https://github.com/dbt-labs/dbt-utils/issues/885 12 | # on: 13 | # push: 14 | # branches: [main] 15 | # paths: ['README.md'] 16 | 17 | jobs: 18 | build: 19 | runs-on: ubuntu-latest 20 | timeout-minutes: 5 21 | steps: 22 | - uses: actions/checkout@v3 23 | - run: | 24 | curl https://raw.githubusercontent.com/ekalinin/github-markdown-toc/master/gh-md-toc -o gh-md-toc 25 | chmod a+x gh-md-toc 26 | ./gh-md-toc --insert --no-backup README.md 27 | rm ./gh-md-toc 28 | - uses: stefanzweifel/git-auto-commit-action@v4 29 | with: 30 | commit_message: Auto update table of contents 31 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/datetime/test_date_spine.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake doesn't like this as a view because the `generate_series` 3 | -- call creates a CTE called `unioned`, as does the `equality` generic test. 4 | -- Ideally, Snowflake would be smart enough to know that these CTE names are 5 | -- different, as they live in different relations. TODO: use a less common cte name 6 | 7 | {{ config(materialized='table') }} 8 | 9 | with date_spine as ( 10 | 11 | {% if target.type == 'postgres' %} 12 | {{ dbt_utils.date_spine("day", "'2018-01-01'::date", "'2018-01-10'::date") }} 13 | 14 | {% elif target.type == 'bigquery' %} 15 | select cast(date_day as date) as date_day 16 | from ({{ dbt_utils.date_spine("day", "'2018-01-01'", "'2018-01-10'") }}) 17 | 18 | {% else %} 19 | {{ dbt_utils.date_spine("day", "'2018-01-01'", "'2018-01-10'") }} 20 | {% endif %} 21 | 22 | ) 23 | 24 | select date_day 25 | from date_spine 26 | 27 | -------------------------------------------------------------------------------- /dbt/models/models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: stg_customers 4 | columns: 5 | - name: customer_id 6 | tests: [not_null, unique] 7 | - name: country 8 | tests: [not_null] 9 | 10 | - name: stg_payments 11 | columns: 12 | - name: payment_id 13 | tests: [not_null, unique] 14 | - name: status 15 | tests: 16 | - accepted_values: 17 | values: [succeeded, failed, refunded] 18 | 19 | - name: stg_sessions 20 | columns: 21 | - name: session_id 22 | tests: [not_null, unique] 23 | - name: source 24 | tests: 25 | - accepted_values: 26 | values: [google, direct, facebook, linkedin, newsletter, referral, bing] 27 | 28 | - name: fct_revenue_daily 29 | columns: 30 | - name: order_day 31 | tests: [not_null] 32 | - name: fct_marketing_attribution 33 | columns: 34 | - name: session_day 35 | tests: [not_null] 36 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/web/get_url_path.sql: -------------------------------------------------------------------------------- 1 | {% macro get_url_path(field) -%} 2 | {{ return(adapter.dispatch('get_url_path', 'dbt_utils')(field)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_url_path(field) -%} 6 | 7 | {%- set stripped_url = 8 | dbt.replace( 9 | dbt.replace(field, "'http://'", "''"), "'https://'", "''") 10 | -%} 11 | 12 | {%- set first_slash_pos -%} 13 | coalesce( 14 | nullif({{ dbt.position("'/'", stripped_url) }}, 0), 15 | {{ dbt.position("'?'", stripped_url) }} - 1 16 | ) 17 | {%- endset -%} 18 | 19 | {%- set parsed_path = 20 | dbt.split_part( 21 | dbt.right( 22 | stripped_url, 23 | dbt.length(stripped_url) ~ "-" ~ first_slash_pos 24 | ), 25 | "'?'", 1 26 | ) 27 | -%} 28 | 29 | {{ dbt.safe_cast( 30 | parsed_path, 31 | dbt.type_string() 32 | )}} 33 | 34 | {%- endmacro %} 35 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/not_accepted_values.sql: -------------------------------------------------------------------------------- 1 | {% test not_accepted_values(model, column_name, values, quote=True) %} 2 | {{ return(adapter.dispatch('test_not_accepted_values', 'dbt_utils')(model, column_name, values, quote)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_not_accepted_values(model, column_name, values, quote=True) %} 6 | with all_values as ( 7 | 8 | select distinct 9 | {{ column_name }} as value_field 10 | 11 | from {{ model }} 12 | 13 | ), 14 | 15 | validation_errors as ( 16 | 17 | select 18 | value_field 19 | 20 | from all_values 21 | where value_field in ( 22 | {% for value in values -%} 23 | {% if quote -%} 24 | '{{ value }}' 25 | {%- else -%} 26 | {{ value }} 27 | {%- endif -%} 28 | {%- if not loop.last -%},{%- endif %} 29 | {%- endfor %} 30 | ) 31 | 32 | ) 33 | 34 | select * 35 | from validation_errors 36 | 37 | {% endmacro %} 38 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_unpivot_bool.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake messes with these tests pretty badly since the 3 | -- output of the macro considers the casing of the source 4 | -- table columns. Using some hacks here to get this to work, 5 | -- but we should consider lowercasing the unpivot macro output 6 | -- at some point in the future for consistency 7 | 8 | {% if target.name == 'snowflake' %} 9 | {% set exclude = ['CUSTOMER_ID', 'CREATED_AT'] %} 10 | {% else %} 11 | {% set exclude = ['customer_id', 'created_at'] %} 12 | {% endif %} 13 | 14 | 15 | select 16 | customer_id, 17 | created_at, 18 | case 19 | when '{{ target.name }}' = 'snowflake' then lower(prop) 20 | else prop 21 | end as prop, 22 | val 23 | 24 | from ( 25 | {{ dbt_utils.unpivot( 26 | relation=ref('data_unpivot_bool'), 27 | cast_to=type_string(), 28 | exclude=exclude, 29 | field_name='prop', 30 | value_name='val' 31 | ) }} 32 | ) as sbq 33 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_safe_divide.sql: -------------------------------------------------------------------------------- 1 | 2 | with data_safe_divide as ( 3 | 4 | select * from {{ ref('data_safe_divide') }} 5 | 6 | ), 7 | 8 | data_safe_divide_numerator_expressions as ( 9 | 10 | select * from {{ ref('data_safe_divide_numerator_expressions') }} 11 | ), 12 | 13 | data_safe_divide_denominator_expressions as ( 14 | 15 | select * from {{ ref('data_safe_divide_denominator_expressions') }} 16 | ) 17 | 18 | select 19 | {{ dbt_utils.safe_divide('numerator', 'denominator') }} as actual, 20 | output as expected 21 | 22 | from data_safe_divide 23 | 24 | union all 25 | 26 | select 27 | {{ dbt_utils.safe_divide('numerator_1 * numerator_2', 'denominator') }} as actual, 28 | output as expected 29 | 30 | from data_safe_divide_numerator_expressions 31 | 32 | union all 33 | 34 | select 35 | {{ dbt_utils.safe_divide('numerator', 'denominator_1 * denominator_2') }} as actual, 36 | output as expected 37 | 38 | from data_safe_divide_denominator_expressions -------------------------------------------------------------------------------- /dbt/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # dbt_project.yml is the central configuration file for this dbt project. 3 | # It sets metadata (name, version), references the profile in `profiles.yml` 4 | # for connections, and defines paths and defaults for models, seeds, tests, etc. 5 | 6 | # You may also see additional dbt_project.yml files under `dbt/dbt_packages/`. 7 | # Each package (e.g., `dbt_utils`) is a self‑contained dbt project, so it ships 8 | # its own dbt_project.yml. Those files configure the package’s models and macros 9 | # and don’t override settings in this main project. 10 | 11 | name: api_bi_project 12 | version: 1.0.0 13 | config-version: 2 14 | profile: api_bi_profile 15 | model-paths: ["models"] 16 | 17 | models: 18 | api_bi_project: # must equal `name:` above 19 | staging: # must be a folder: models/staging/* 20 | +materialized: view 21 | +schema: staging 22 | marts: # must be a folder: models/marts/* 23 | +materialized: table 24 | +schema: analytics 25 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_not_empty_string_passing.sql: -------------------------------------------------------------------------------- 1 | -- dbt seed casts '' as NULL, so we need to select empty strings to enable testing 2 | 3 | with blank_data as ( 4 | 5 | select 6 | 1 as id, 7 | 'not an empty string' as string_trim_whitespace_true, 8 | 'not an empty string' as string_trim_whitespace_false 9 | 10 | union all 11 | 12 | select 13 | 2 as id, 14 | 'also not an empty string' as string_trim_whitespace_true, 15 | 'also not an empty string' as string_trim_whitespace_false 16 | 17 | union all 18 | 19 | select 20 | 3 as id, 21 | 'string with trailing whitespace ' as string_trim_whitespace_true, 22 | ' ' as string_trim_whitespace_false -- This will cause a failure when trim_whitespace = true 23 | 24 | union all 25 | 26 | select 27 | 6 as id, 28 | null as string_trim_whitespace_true, 29 | null as string_trim_whitespace_false 30 | 31 | ) 32 | 33 | select * from blank_data -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_unpivot.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake messes with these tests pretty badly since the 3 | -- output of the macro considers the casing of the source 4 | -- table columns. Using some hacks here to get this to work, 5 | -- but we should consider lowercasing the unpivot macro output 6 | -- at some point in the future for consistency 7 | 8 | {% if target.name == 'snowflake' %} 9 | {% set exclude = ['CUSTOMER_ID', 'CREATED_AT'] %} 10 | {% else %} 11 | {% set exclude = ['customer_id', 'created_at'] %} 12 | {% endif %} 13 | 14 | 15 | select 16 | customer_id, 17 | created_at, 18 | case 19 | when '{{ target.name }}' = 'snowflake' then lower(prop) 20 | else prop 21 | end as prop, 22 | val 23 | 24 | from ( 25 | {{ dbt_utils.unpivot( 26 | relation=ref('data_unpivot'), 27 | cast_to=type_string(), 28 | exclude=exclude, 29 | remove=['name'], 30 | field_name='prop', 31 | value_name='val' 32 | ) }} 33 | ) as sbq 34 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | resolves # 2 | 3 | ### Problem 4 | 5 | 9 | 10 | ### Solution 11 | 12 | 17 | 18 | ## Checklist 19 | - [ ] This code is associated with an [issue](https://github.com/dbt-labs/dbt-utils/issues) which has been triaged and [accepted for development](https://docs.getdbt.com/docs/contributing/oss-expectations#pull-requests). 20 | - [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-utils/blob/main/CONTRIBUTING.md) and understand what's expected of me 21 | - [ ] I have run this code in development and it appears to resolve the stated issue 22 | - [ ] This PR includes tests, or tests are not required/relevant for this PR 23 | - [ ] I have updated the README.md (if applicable) 24 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/workflows/triage-labels.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # When we triage issues, we sometimes need more information from the issue creator. In 3 | # those cases we remove the `triage` label and add the `awaiting_response` label. Once we 4 | # receive a response in the form of a comment, we want the `awaiting_response` label removed 5 | # in favor of the `triage` label so we are aware that the issue needs action. 6 | 7 | # **why?** 8 | # To help with out team triage issue tracking 9 | 10 | # **when?** 11 | # This will run when a comment is added to an issue and that issue has the `awaiting_response` label. 12 | 13 | name: Update Triage Label 14 | 15 | on: issue_comment 16 | 17 | defaults: 18 | run: 19 | shell: bash 20 | 21 | permissions: 22 | issues: write 23 | 24 | jobs: 25 | triage_label: 26 | if: contains(github.event.issue.labels.*.name, 'awaiting_response') 27 | uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main 28 | with: 29 | add_label: "triage" 30 | remove_label: "awaiting_response" 31 | secrets: inherit 32 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_filtered_columns_in_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro get_filtered_columns_in_relation(from, except=[]) -%} 2 | {{ return(adapter.dispatch('get_filtered_columns_in_relation', 'dbt_utils')(from, except)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_filtered_columns_in_relation(from, except=[]) -%} 6 | {%- do dbt_utils._is_relation(from, 'get_filtered_columns_in_relation') -%} 7 | {%- do dbt_utils._is_ephemeral(from, 'get_filtered_columns_in_relation') -%} 8 | 9 | {# -- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 10 | {%- if not execute -%} 11 | {{ return('') }} 12 | {% endif %} 13 | 14 | {%- set include_cols = [] %} 15 | {%- set cols = adapter.get_columns_in_relation(from) -%} 16 | {%- set except = except | map("lower") | list %} 17 | {%- for col in cols -%} 18 | {%- if col.column|lower not in except -%} 19 | {% do include_cols.append(col.column) %} 20 | {%- endif %} 21 | {%- endfor %} 22 | 23 | {{ return(include_cols) }} 24 | 25 | {%- endmacro %} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_single_value.sql: -------------------------------------------------------------------------------- 1 | {% macro get_single_value(query, default=none) %} 2 | {{ return(adapter.dispatch('get_single_value', 'dbt_utils')(query, default)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_single_value(query, default) %} 6 | 7 | {# This macro returns the (0, 0) record in a query, i.e. the first row of the first column #} 8 | 9 | {%- call statement('get_query_result', fetch_result=True, auto_begin=false) -%} 10 | 11 | {{ query }} 12 | 13 | {%- endcall -%} 14 | 15 | {%- if execute -%} 16 | 17 | {% set r = load_result('get_query_result').table.columns[0].values() %} 18 | {% if r | length == 0 %} 19 | {% do print('Query `' ~ query ~ '` returned no rows. Using the default value: ' ~ default) %} 20 | {% set sql_result = default %} 21 | {% else %} 22 | {% set sql_result = r[0] %} 23 | {% endif %} 24 | 25 | {%- else -%} 26 | 27 | {% set sql_result = default %} 28 | 29 | {%- endif -%} 30 | 31 | {% do return(sql_result) %} 32 | 33 | {% endmacro %} -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/geo/test_haversine_distance_mi.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | select * from {{ ref('data_haversine_mi') }} 3 | ), 4 | final as ( 5 | select 6 | output as expected, 7 | cast( 8 | {{ 9 | dbt_utils.haversine_distance( 10 | lat1='lat_1', 11 | lon1='lon_1', 12 | lat2='lat_2', 13 | lon2='lon_2', 14 | unit='mi' 15 | ) 16 | }} as {{ type_numeric() }} 17 | ) as actual 18 | from data 19 | 20 | union all 21 | 22 | select 23 | output as expected, 24 | cast( 25 | {{ 26 | dbt_utils.haversine_distance( 27 | lat1='lat_1', 28 | lon1='lon_1', 29 | lat2='lat_2', 30 | lon2='lon_2', 31 | ) 32 | }} as {{ type_numeric() }} 33 | ) as actual 34 | from data 35 | ) 36 | select 37 | expected, 38 | round(actual,0) as actual 39 | from final 40 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/cardinality_equality.sql: -------------------------------------------------------------------------------- 1 | {% test cardinality_equality(model, column_name, to, field) %} 2 | {{ return(adapter.dispatch('test_cardinality_equality', 'dbt_utils')(model, column_name, to, field)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_cardinality_equality(model, column_name, to, field) %} 6 | 7 | {# T-SQL does not let you use numbers as aliases for columns #} 8 | {# Thus, no "GROUP BY 1" #} 9 | 10 | with table_a as ( 11 | select 12 | {{ column_name }}, 13 | count(*) as num_rows 14 | from {{ model }} 15 | group by {{ column_name }} 16 | ), 17 | 18 | table_b as ( 19 | select 20 | {{ field }}, 21 | count(*) as num_rows 22 | from {{ to }} 23 | group by {{ field }} 24 | ), 25 | 26 | except_a as ( 27 | select * 28 | from table_a 29 | {{ dbt.except() }} 30 | select * 31 | from table_b 32 | ), 33 | 34 | except_b as ( 35 | select * 36 | from table_b 37 | {{ dbt.except() }} 38 | select * 39 | from table_a 40 | ), 41 | 42 | unioned as ( 43 | select * 44 | from except_a 45 | union all 46 | select * 47 | from except_b 48 | ) 49 | 50 | select * 51 | from unioned 52 | 53 | {% endmacro %} 54 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/sql/test_get_column_values_use_default.sql: -------------------------------------------------------------------------------- 1 | 2 | {# 3 | This keeps succeeding locally and failing in CI. Disabling it to get everything else out, but it should still be tested. 4 | https://github.com/dbt-labs/dbt-utils/issues/788 5 | #} 6 | 7 | {{ config(enabled = false)}} 8 | 9 | {% set column_values = dbt_utils.get_column_values(ref('data_get_column_values_dropped'), 'field', default=['y', 'z'], order_by="field") %} 10 | 11 | with expected as ( 12 | select {{ safe_cast("'y'", type_string()) }} as expected_column_value union all 13 | select {{ safe_cast("'z'", type_string()) }} as expected_column_value 14 | ), 15 | 16 | actual as ( 17 | 18 | {% for val in column_values %} 19 | select {{ safe_cast("'" ~ val ~ "'", type_string()) }} as actual_column_value 20 | {% if not loop.last %} 21 | union all 22 | {% endif %} 23 | {% endfor %} 24 | ), 25 | 26 | failures as ( 27 | select * from actual 28 | where actual.actual_column_value not in ( 29 | select expected.expected_column_value from expected 30 | ) 31 | ) 32 | 33 | select * from failures 34 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_single_value_default.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Dear future reader, 3 | Before you go restructuring the delicate web of casts and quotes below, a warning: 4 | I once thought as you are thinking. Proceed with caution. 5 | #} 6 | 7 | {% set false_statement = 'select 1 as id ' ~ limit_zero() %} 8 | 9 | with default_data as ( 10 | 11 | select 12 | cast({{ dbt.string_literal('2022-01-01') }} as {{ dbt.type_timestamp() }}) as date_expected, 13 | cast({{ dbt.string_literal(dbt_utils.get_single_value(false_statement, '2022-01-01')) }} as {{ dbt.type_timestamp() }}) as date_actual, 14 | 15 | 1.23456 as float_expected, 16 | {{ dbt_utils.get_single_value(false_statement, 1.23456) }} as float_actual, 17 | 18 | 123456 as int_expected, 19 | {{ dbt_utils.get_single_value(false_statement, 123456) }} as int_actual, 20 | 21 | cast({{ dbt.string_literal('fallback') }} as {{ dbt.type_string() }}) as string_expected, 22 | cast({{ dbt.string_literal(dbt_utils.get_single_value(false_statement, 'fallback')) }} as {{ dbt.type_string() }}) as string_actual 23 | 24 | from {{ ref('data_get_single_value') }} 25 | ) 26 | 27 | select * 28 | from default_data -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/width_bucket.sql: -------------------------------------------------------------------------------- 1 | {% macro width_bucket(expr, min_value, max_value, num_buckets) %} 2 | {{ return(adapter.dispatch('width_bucket', 'dbt_utils') (expr, min_value, max_value, num_buckets)) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__width_bucket(expr, min_value, max_value, num_buckets) -%} 7 | 8 | {% set bin_size -%} 9 | (( {{ max_value }} - {{ min_value }} ) / {{ num_buckets }} ) 10 | {%- endset %} 11 | ( 12 | -- to break ties when the amount is eaxtly at the bucket egde 13 | case 14 | when 15 | mod( 16 | {{ dbt.safe_cast(expr, dbt.type_numeric() ) }}, 17 | {{ dbt.safe_cast(bin_size, dbt.type_numeric() ) }} 18 | ) = 0 19 | then 1 20 | else 0 21 | end 22 | ) + 23 | -- Anything over max_value goes the N+1 bucket 24 | least( 25 | ceil( 26 | ({{ expr }} - {{ min_value }})/{{ bin_size }} 27 | ), 28 | {{ num_buckets }} + 1 29 | ) 30 | {%- endmacro %} 31 | 32 | {% macro snowflake__width_bucket(expr, min_value, max_value, num_buckets) %} 33 | width_bucket({{ expr }}, {{ min_value }}, {{ max_value }}, {{ num_buckets }} ) 34 | {% endmacro %} 35 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_relations_by_prefix.sql: -------------------------------------------------------------------------------- 1 | {% macro get_relations_by_prefix(schema, prefix, exclude='', database=target.database) %} 2 | {{ return(adapter.dispatch('get_relations_by_prefix', 'dbt_utils')(schema, prefix, exclude, database)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_relations_by_prefix(schema, prefix, exclude='', database=target.database) %} 6 | 7 | {%- call statement('get_tables', fetch_result=True) %} 8 | 9 | {{ dbt_utils.get_tables_by_prefix_sql(schema, prefix, exclude, database) }} 10 | 11 | {%- endcall -%} 12 | 13 | {%- set table_list = load_result('get_tables') -%} 14 | 15 | {%- if table_list and table_list['table'] -%} 16 | {%- set tbl_relations = [] -%} 17 | {%- for row in table_list['table'] -%} 18 | {%- set tbl_relation = api.Relation.create( 19 | database=database, 20 | schema=row.table_schema, 21 | identifier=row.table_name, 22 | type=row.table_type 23 | ) -%} 24 | {%- do tbl_relations.append(tbl_relation) -%} 25 | {%- endfor -%} 26 | 27 | {{ return(tbl_relations) }} 28 | {%- else -%} 29 | {{ return([]) }} 30 | {%- endif -%} 31 | 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/unique_combination_of_columns.sql: -------------------------------------------------------------------------------- 1 | {% test unique_combination_of_columns(model, combination_of_columns, quote_columns=false) %} 2 | {{ return(adapter.dispatch('test_unique_combination_of_columns', 'dbt_utils')(model, combination_of_columns, quote_columns)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_unique_combination_of_columns(model, combination_of_columns, quote_columns=false) %} 6 | 7 | {% if not quote_columns %} 8 | {%- set column_list=combination_of_columns %} 9 | {% elif quote_columns %} 10 | {%- set column_list=[] %} 11 | {% for column in combination_of_columns -%} 12 | {% do column_list.append( adapter.quote(column) ) %} 13 | {%- endfor %} 14 | {% else %} 15 | {{ exceptions.raise_compiler_error( 16 | "`quote_columns` argument for unique_combination_of_columns test must be one of [True, False] Got: '" ~ quote ~"'.'" 17 | ) }} 18 | {% endif %} 19 | 20 | {%- set columns_csv=column_list | join(', ') %} 21 | 22 | 23 | with validation_errors as ( 24 | 25 | select 26 | {{ columns_csv }} 27 | from {{ model }} 28 | group by {{ columns_csv }} 29 | having count(*) > 1 30 | 31 | ) 32 | 33 | select * 34 | from validation_errors 35 | 36 | 37 | {% endmacro %} 38 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/relationships_where.sql: -------------------------------------------------------------------------------- 1 | {% test relationships_where(model, column_name, to, field, from_condition="1=1", to_condition="1=1") %} 2 | {{ return(adapter.dispatch('test_relationships_where', 'dbt_utils')(model, column_name, to, field, from_condition, to_condition)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_relationships_where(model, column_name, to, field, from_condition="1=1", to_condition="1=1") %} 6 | 7 | {# T-SQL has no boolean data type so we use 1=1 which returns TRUE #} 8 | {# ref https://stackoverflow.com/a/7170753/3842610 #} 9 | 10 | with left_table as ( 11 | 12 | select 13 | {{column_name}} as id 14 | 15 | from {{model}} 16 | 17 | where {{column_name}} is not null 18 | and {{from_condition}} 19 | 20 | ), 21 | 22 | right_table as ( 23 | 24 | select 25 | {{field}} as id 26 | 27 | from {{to}} 28 | 29 | where {{field}} is not null 30 | and {{to_condition}} 31 | 32 | ), 33 | 34 | exceptions as ( 35 | 36 | select 37 | left_table.id, 38 | right_table.id as right_id 39 | 40 | from left_table 41 | 42 | left join right_table 43 | on left_table.id = right_table.id 44 | 45 | where right_table.id is null 46 | 47 | ) 48 | 49 | select * from exceptions 50 | 51 | {% endmacro %} 52 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T210000+0000/task_id=dbt_build/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-15T23:18:07.089+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-15T23:18:07.162+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-15T23:18:07.200+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-15T23:18:07.201+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-15T23:18:07.255+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 21:00:00+00:00 6 | [2025-09-15T23:18:07.272+0200] {standard_task_runner.py:64} INFO - Started process 263 to run task 7 | [2025-09-15T23:18:07.283+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'dbt_build', 'scheduled__2025-09-15T21:00:00+00:00', '--job-id', '6', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmp7xz9ujen'] 8 | [2025-09-15T23:18:07.293+0200] {standard_task_runner.py:91} INFO - Job 6: Subtask dbt_build 9 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/accepted_range.sql: -------------------------------------------------------------------------------- 1 | {% test accepted_range(model, column_name, min_value=none, max_value=none, inclusive=true) %} 2 | {{ return(adapter.dispatch('test_accepted_range', 'dbt_utils')(model, column_name, min_value, max_value, inclusive)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_accepted_range(model, column_name, min_value=none, max_value=none, inclusive=true) %} 6 | 7 | with meet_condition as( 8 | select * 9 | from {{ model }} 10 | ), 11 | 12 | validation_errors as ( 13 | select * 14 | from meet_condition 15 | where 16 | -- never true, defaults to an empty result set. Exists to ensure any combo of the `or` clauses below succeeds 17 | 1 = 2 18 | 19 | {%- if min_value is not none %} 20 | -- records with a value >= min_value are permitted. The `not` flips this to find records that don't meet the rule. 21 | or not {{ column_name }} > {{- "=" if inclusive }} {{ min_value }} 22 | {%- endif %} 23 | 24 | {%- if max_value is not none %} 25 | -- records with a value <= max_value are permitted. The `not` flips this to find records that don't meet the rule. 26 | or not {{ column_name }} < {{- "=" if inclusive }} {{ max_value }} 27 | {%- endif %} 28 | ) 29 | 30 | select * 31 | from validation_errors 32 | 33 | {% endmacro %} 34 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/not_null_proportion.sql: -------------------------------------------------------------------------------- 1 | {% macro test_not_null_proportion(model, group_by_columns = []) %} 2 | {{ return(adapter.dispatch('test_not_null_proportion', 'dbt_utils')(model, group_by_columns, **kwargs)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__test_not_null_proportion(model, group_by_columns) %} 6 | 7 | {% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} 8 | {% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} 9 | {% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} 10 | 11 | {% if group_by_columns|length() > 0 %} 12 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 13 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 14 | {% endif %} 15 | 16 | with validation as ( 17 | select 18 | {{select_gb_cols}} 19 | sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as {{ dbt.type_numeric() }}) as not_null_proportion 20 | from {{ model }} 21 | {{groupby_gb_cols}} 22 | ), 23 | validation_errors as ( 24 | select 25 | {{select_gb_cols}} 26 | not_null_proportion 27 | from validation 28 | where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} 29 | ) 30 | select 31 | * 32 | from validation_errors 33 | 34 | {% endmacro %} 35 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_table_types_sql.sql: -------------------------------------------------------------------------------- 1 | {%- macro get_table_types_sql() -%} 2 | {{ return(adapter.dispatch('get_table_types_sql', 'dbt_utils')()) }} 3 | {%- endmacro -%} 4 | 5 | {% macro default__get_table_types_sql() %} 6 | case table_type 7 | when 'BASE TABLE' then 'table' 8 | when 'EXTERNAL TABLE' then 'external' 9 | when 'MATERIALIZED VIEW' then 'materializedview' 10 | else lower(table_type) 11 | end as {{ adapter.quote('table_type') }} 12 | {% endmacro %} 13 | 14 | 15 | {% macro postgres__get_table_types_sql() %} 16 | case table_type 17 | when 'BASE TABLE' then 'table' 18 | when 'FOREIGN' then 'external' 19 | when 'MATERIALIZED VIEW' then 'materializedview' 20 | else lower(table_type) 21 | end as {{ adapter.quote('table_type') }} 22 | {% endmacro %} 23 | 24 | 25 | {% macro databricks__get_table_types_sql() %} 26 | case table_type 27 | when 'MANAGED' then 'table' 28 | when 'BASE TABLE' then 'table' 29 | when 'MATERIALIZED VIEW' then 'materializedview' 30 | else lower(table_type) 31 | end as {{ adapter.quote('table_type') }} 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_relations_by_pattern.sql: -------------------------------------------------------------------------------- 1 | {% macro get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %} 2 | {{ return(adapter.dispatch('get_relations_by_pattern', 'dbt_utils')(schema_pattern, table_pattern, exclude, database)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %} 6 | 7 | {%- call statement('get_tables', fetch_result=True) %} 8 | 9 | {{ dbt_utils.get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude, database) }} 10 | 11 | {%- endcall -%} 12 | 13 | {%- set table_list = load_result('get_tables') -%} 14 | 15 | {%- if table_list and table_list['table'] -%} 16 | {%- set tbl_relations = [] -%} 17 | {%- for row in table_list['table'] -%} 18 | {%- set tbl_relation = api.Relation.create( 19 | database=database, 20 | schema=row.table_schema, 21 | identifier=row.table_name, 22 | type=row.table_type 23 | ) -%} 24 | {%- do tbl_relations.append(tbl_relation) -%} 25 | {%- endfor -%} 26 | 27 | {{ return(tbl_relations) }} 28 | {%- else -%} 29 | {{ return([]) }} 30 | {%- endif -%} 31 | 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/recency.sql: -------------------------------------------------------------------------------- 1 | {% test recency(model, field, datepart, interval, ignore_time_component=False, group_by_columns = []) %} 2 | {{ return(adapter.dispatch('test_recency', 'dbt_utils')(model, field, datepart, interval, ignore_time_component, group_by_columns)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_recency(model, field, datepart, interval, ignore_time_component, group_by_columns) %} 6 | 7 | {% set threshold = 'cast(' ~ dbt.dateadd(datepart, interval * -1, dbt.current_timestamp()) ~ ' as ' ~ ('date' if ignore_time_component else dbt.type_timestamp()) ~ ')' %} 8 | 9 | {% if group_by_columns|length() > 0 %} 10 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 11 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 12 | {% endif %} 13 | 14 | 15 | with recency as ( 16 | 17 | select 18 | 19 | {{ select_gb_cols }} 20 | {% if ignore_time_component %} 21 | cast(max({{ field }}) as date) as most_recent 22 | {%- else %} 23 | max({{ field }}) as most_recent 24 | {%- endif %} 25 | 26 | from {{ model }} 27 | 28 | {{ groupby_gb_cols }} 29 | 30 | ) 31 | 32 | select 33 | 34 | {{ select_gb_cols }} 35 | most_recent, 36 | {{ threshold }} as threshold 37 | 38 | from recency 39 | where most_recent < {{ threshold }} 40 | 41 | {% endmacro %} 42 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/ISSUE_TEMPLATE/dbt_minor_release.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: dbt Minor Release Follow-Up 3 | about: A checklist of tasks to complete after a minor release is made to dbt 4 | title: 'dbt Minor Release Follow up for dbt v0.x.0' 5 | labels: 6 | assignees: '' 7 | --- 8 | 9 | 13 | 14 | First, check if this is a breaking change 15 | - [ ] Increase the upper bound of the `require-dbt-version` config in the `dbt_project.yml` 16 | - [ ] Increase the upper bound of the dbt version in `run_test.sh` 17 | - [ ] Create a PR against the `main` branch to see if tests pass 18 | 19 | If test pass, this is _not_ a breaking change. You should: 20 | - [ ] Merge into `main` 21 | - [ ] Create a patch release 22 | 23 | If tests fail, this _is_ a breaking change. You'll need to create a minor release: 24 | - [ ] Change the PR base to be against the next `dev` branch. 25 | - [ ] Increase the lower bound to the current dbt minor version in both the `dbt_project.yml` and `run_test.sh` files 26 | - [ ] Fix any errors 27 | - [ ] Merge `dev` into `main` 28 | - [ ] Create a minor release 29 | - [ ] Once the release is available on hub, [create a new issue](https://github.com/dbt-labs/dbt-utils/issues/new/choose) using the "dbt-utils Minor Release Checklist" template 30 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/generate_series.sql: -------------------------------------------------------------------------------- 1 | {% macro get_powers_of_two(upper_bound) %} 2 | {{ return(adapter.dispatch('get_powers_of_two', 'dbt_utils')(upper_bound)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_powers_of_two(upper_bound) %} 6 | 7 | {% if upper_bound <= 0 %} 8 | {{ exceptions.raise_compiler_error("upper bound must be positive") }} 9 | {% endif %} 10 | 11 | {% for _ in range(1, 100) %} 12 | {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %} 13 | {% endfor %} 14 | 15 | {% endmacro %} 16 | 17 | 18 | {% macro generate_series(upper_bound) %} 19 | {{ return(adapter.dispatch('generate_series', 'dbt_utils')(upper_bound)) }} 20 | {% endmacro %} 21 | 22 | {% macro default__generate_series(upper_bound) %} 23 | 24 | {% set n = dbt_utils.get_powers_of_two(upper_bound) %} 25 | 26 | with p as ( 27 | select 0 as generated_number union all select 1 28 | ), unioned as ( 29 | 30 | select 31 | 32 | {% for i in range(n) %} 33 | p{{i}}.generated_number * power(2, {{i}}) 34 | {% if not loop.last %} + {% endif %} 35 | {% endfor %} 36 | + 1 37 | as generated_number 38 | 39 | from 40 | 41 | {% for i in range(n) %} 42 | p as p{{i}} 43 | {% if not loop.last %} cross join {% endif %} 44 | {% endfor %} 45 | 46 | ) 47 | 48 | select * 49 | from unioned 50 | where generated_number <= {{upper_bound}} 51 | order by generated_number 52 | 53 | {% endmacro %} 54 | -------------------------------------------------------------------------------- /dbt/profiles/profiles.yml: -------------------------------------------------------------------------------- 1 | # profiles.yml defines how dbt connects to your database (target, credentials, and default schema). 2 | # Leaving `schema` blank creates top-level `staging` and `analytics` schemas; setting it to `public` prefixes them (`public_staging`, `public_analytics`). 3 | 4 | api_bi_profile: 5 | target: dev # Default profile target 6 | outputs: # Connection settings per target 7 | dev: # Development environment 8 | type: postgres # Adapter type (PostgreSQL) 9 | host: postgres # Database host 10 | user: "{{ env_var('POSTGRES_USER') }}" # Username from env var 11 | password: "{{ env_var('POSTGRES_PASSWORD') }}" # Password from env var 12 | port: 5432 # Database port 13 | dbname: "{{ env_var('POSTGRES_DB') }}" # Database name 14 | schema: "public" # Base schema name; dbt appends folder-specific suffixes (staging → 15 | # `public_staging`, marts → `public_analytics`). 16 | # 17 | # Why set a base schema? 18 | # - Keeps all dbt-created objects grouped under a consistent prefix. 19 | # - Avoids cluttering the top-level namespace with plain `staging` / 20 | # `analytics` schemas. 21 | # - Works with the initialization SQL, which now creates 22 | # `public_staging` and `public_analytics` ahead of dbt runs. 23 | threads: 4 # Number of threads dbt uses 24 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/at_least_one.sql: -------------------------------------------------------------------------------- 1 | {% test at_least_one(model, column_name, group_by_columns = []) %} 2 | {{ return(adapter.dispatch('test_at_least_one', 'dbt_utils')(model, column_name, group_by_columns)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_at_least_one(model, column_name, group_by_columns) %} 6 | 7 | {% set pruned_cols = [column_name] %} 8 | 9 | {% if group_by_columns|length() > 0 %} 10 | 11 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 12 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 13 | {% set pruned_cols = group_by_columns %} 14 | 15 | {% if column_name not in pruned_cols %} 16 | {% do pruned_cols.append(column_name) %} 17 | {% endif %} 18 | 19 | {% endif %} 20 | 21 | {% set select_pruned_cols = pruned_cols|join(' ,') %} 22 | 23 | select * 24 | from ( 25 | with pruned_rows as ( 26 | select 27 | {{ select_pruned_cols }} 28 | from {{ model }} 29 | {% if group_by_columns|length() == 0 %} 30 | where {{ column_name }} is not null 31 | limit 1 32 | {% endif %} 33 | ) 34 | select 35 | {# In TSQL, subquery aggregate columns need aliases #} 36 | {# thus: a filler col name, 'filler_column' #} 37 | {{select_gb_cols}} 38 | count({{ column_name }}) as filler_column 39 | 40 | from pruned_rows 41 | 42 | {{groupby_gb_cols}} 43 | 44 | having count({{ column_name }}) = 0 45 | 46 | ) validation_errors 47 | 48 | {% endmacro %} 49 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/models/sql/test_get_single_value.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Dear future reader, 3 | Before you go restructuring the delicate web of casts and quotes below, a warning: 4 | I once thought as you are thinking. Proceed with caution. 5 | #} 6 | 7 | {% set date_statement %} 8 | select date_value from {{ ref('data_get_single_value') }} 9 | {% endset %} 10 | 11 | {% set float_statement %} 12 | select float_value from {{ ref('data_get_single_value') }} 13 | {% endset %} 14 | 15 | {% set int_statement %} 16 | select int_value from {{ ref('data_get_single_value') }} 17 | {% endset %} 18 | 19 | {% set string_statement %} 20 | select string_value from {{ ref('data_get_single_value') }} 21 | {% endset %} 22 | 23 | with default_data as ( 24 | 25 | select 26 | cast(date_value as {{ dbt.type_timestamp() }}) as date_expected, 27 | cast({{ dbt.string_literal(dbt_utils.get_single_value(date_statement)) }} as {{ dbt.type_timestamp() }}) as date_actual, 28 | 29 | float_value as float_expected, 30 | {{ dbt_utils.get_single_value(float_statement) }} as float_actual, 31 | 32 | int_value as int_expected, 33 | {{ dbt_utils.get_single_value(int_statement) }} as int_actual, 34 | 35 | string_value as string_expected, 36 | cast({{ dbt.string_literal(dbt_utils.get_single_value(string_statement)) }} as {{ dbt.type_string() }}) as string_actual 37 | 38 | from {{ ref('data_get_single_value') }} 39 | ) 40 | 41 | select * 42 | from default_data -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/sequential_values.sql: -------------------------------------------------------------------------------- 1 | {% test sequential_values(model, column_name, interval=1, datepart=None, group_by_columns = []) %} 2 | 3 | {{ return(adapter.dispatch('test_sequential_values', 'dbt_utils')(model, column_name, interval, datepart, group_by_columns)) }} 4 | 5 | {% endtest %} 6 | 7 | {% macro default__test_sequential_values(model, column_name, interval=1, datepart=None, group_by_columns = []) %} 8 | 9 | {% set previous_column_name = "previous_" ~ dbt_utils.slugify(column_name) %} 10 | 11 | {% if group_by_columns|length() > 0 %} 12 | {% set select_gb_cols = group_by_columns|join(',') + ', ' %} 13 | {% set partition_gb_cols = 'partition by ' + group_by_columns|join(',') %} 14 | {% endif %} 15 | 16 | with windowed as ( 17 | 18 | select 19 | {{ select_gb_cols }} 20 | {{ column_name }}, 21 | lag({{ column_name }}) over ( 22 | {{partition_gb_cols}} 23 | order by {{ column_name }} 24 | ) as {{ previous_column_name }} 25 | from {{ model }} 26 | ), 27 | 28 | validation_errors as ( 29 | select 30 | * 31 | from windowed 32 | {% if datepart %} 33 | where not(cast({{ column_name }} as {{ dbt.type_timestamp() }})= cast({{ dbt.dateadd(datepart, interval, previous_column_name) }} as {{ dbt.type_timestamp() }})) 34 | {% else %} 35 | where not({{ column_name }} = {{ previous_column_name }} + {{ interval }}) 36 | {% endif %} 37 | ) 38 | 39 | select * 40 | from validation_errors 41 | 42 | {% endmacro %} 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # API → Postgres → dbt → Power BI 2 | 3 | This stack loads raw data from the mock API into Postgres, transforms it with dbt, and exposes cleaned analytics tables (materialized in the `public_analytics` schema) for the `bi_read` user. 4 | 5 | ![Diagramtic](https://github.com/user-attachments/assets/5e84b667-0f24-46bb-85e6-ee05d4039685) 6 | 7 | 8 | ## Services 9 | - **docker** – container runtime and packaging layer; provides consistent environments for Postgres, Airflow, dbt, and the mock API. 10 | - **postgres** – database used by all components 11 | - **mock-api** – provides sample API data 12 | - **dbt** – dbt CLI container with the project mounted at `/usr/app` 13 | - **airflow** – orchestrates extraction and dbt transformations 14 | 15 | ## Usage 16 | 17 | Start the core services (dbt is invoked on-demand by the Airflow DAG): 18 | 19 | ```bash 20 | docker compose up -d --build 21 | ``` 22 | 23 | Airflow performs its own database initialization on startup, so no separate init container is required. 24 | 25 | The DAG will execute dbt via `docker compose run --rm dbt ...` after extracting raw data. 26 | You can still run dbt manually if needed: 27 | 28 | ```bash 29 | docker compose run --rm dbt run 30 | docker compose run --rm dbt test 31 | ``` 32 | 33 | Power BI usage for this project is intentionally lightweight—the goal is simply to prove the pipeline delivers 34 | analytics tables that Power BI can read. Connect with the `bi_read` credentials and verify that the 35 | `public_analytics` schema tables are visible. A fully designed report or synthetic visuals are not required for 36 | sign-off. 37 | 38 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug or an issue you've found with this package 4 | title: '' 5 | labels: bug, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the bug 11 | 14 | 15 | ### Steps to reproduce 16 | 19 | 20 | ### Expected results 21 | 24 | 25 | ### Actual results 26 | 29 | 30 | ### Screenshots and log output 31 | 34 | 35 | ### System information 36 | **The contents of your `packages.yml` file:** 37 | 38 | **Which database are you using dbt with?** 39 | - [ ] postgres 40 | - [ ] redshift 41 | - [ ] bigquery 42 | - [ ] snowflake 43 | - [ ] other (specify: ____________) 44 | 45 | 46 | **The output of `dbt --version`:** 47 | ``` 48 | 49 | ``` 50 | 51 | 52 | ### Additional context 53 | 56 | 57 | ### Are you interested in contributing the fix? 58 | 61 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/RELEASE.md: -------------------------------------------------------------------------------- 1 | # dbt-utils releases 2 | 3 | ## When do we release? 4 | There's a few scenarios that might prompt a release: 5 | 6 | | Scenario | Release type | 7 | |--------------------------------------------|--------------| 8 | | Breaking changes to existing macros | major | 9 | | New functionality | minor | 10 | | Fixes to existing macros | patch | 11 | 12 | ## Release process 13 | 14 | 1. Begin a new release by clicking [here](https://github.com/dbt-labs/dbt-utils/releases/new) 15 | 1. Click "Choose a tag", then paste your version number (with no "v" in the name), then click "Create new tag: x.y.z. on publish" 16 | - The “Release title” will be identical to the tag name 17 | 1. Click the "Generate release notes" button 18 | 1. Copy and paste the generated release notes into [`CHANGELOG.md`](https://github.com/dbt-labs/dbt-utils/blob/main/CHANGELOG.md?plain=1), reformat to match previous entries, commit, and merge into the `main` branch ([example](https://github.com/dbt-labs/dbt-utils/pull/901)) 19 | 1. Click the "Publish release" button 20 | - This will automatically create an "Assets" section containing: 21 | - Source code (zip) 22 | - Source code (tar.gz) 23 | 24 | ## Post-release 25 | 26 | 1. Delete the automatic Zapier post ([example of one intentionally not deleted](https://getdbt.slack.com/archives/CU4MRJ7QB/p1646272037304639)) and replace it with a custom post in the `#package-ecosystem` channel in “The Community Slack” using the content from the tagged release notes (but replace GitHub handles with Slack handles) ([example](https://getdbt.slack.com/archives/CU4MRJ7QB/p1649372590957309)) 27 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Run tests for dbt-utils against supported adapters 3 | 4 | # **why?** 5 | # To ensure that dbt-utils works as expected with all supported adapters 6 | 7 | # **when?** 8 | # On every PR, and every push to main and when manually triggered 9 | 10 | name: Package Integration Tests 11 | 12 | on: 13 | push: 14 | branches: 15 | - main 16 | pull_request_target: 17 | workflow_dispatch: 18 | 19 | 20 | jobs: 21 | run-tests: 22 | uses: dbt-labs/dbt-package-testing/.github/workflows/run_tox.yml@v1 23 | with: 24 | # no need to pass postgres vars in. We can just use the defaults in the local container 25 | # redshift 26 | REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} 27 | REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} 28 | REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }} 29 | REDSHIFT_SCHEMA: "dbt_utils_integration_tests_redshift_${{ github.run_number }}" 30 | REDSHIFT_PORT: 5439 31 | # bigquery 32 | BIGQUERY_PROJECT: ${{ vars.BIGQUERY_PROJECT }} 33 | BIGQUERY_SCHEMA: "dbt_utils_integration_tests_bigquery_${{ github.run_number }}" 34 | # snowflake 35 | SNOWFLAKE_USER: ${{ vars.SNOWFLAKE_USER }} 36 | SNOWFLAKE_ROLE: ${{ vars.SNOWFLAKE_ROLE }} 37 | SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_DATABASE }} 38 | SNOWFLAKE_WAREHOUSE: ${{ vars.SNOWFLAKE_WAREHOUSE }} 39 | SNOWFLAKE_SCHEMA: "dbt_utils_integration_tests_snowflake_${{ github.run_number }}" 40 | secrets: 41 | DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.REDSHIFT_PASS }} 42 | BIGQUERY_KEYFILE_JSON: ${{ secrets.BIGQUERY_KEYFILE_JSON }} 43 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} 44 | DBT_ENV_SECRET_SNOWFLAKE_PASS: ${{ secrets.SNOWFLAKE_PASS }} 45 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/date_spine.sql: -------------------------------------------------------------------------------- 1 | {% macro get_intervals_between(start_date, end_date, datepart) -%} 2 | {{ return(adapter.dispatch('get_intervals_between', 'dbt_utils')(start_date, end_date, datepart)) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__get_intervals_between(start_date, end_date, datepart) -%} 6 | {%- call statement('get_intervals_between', fetch_result=True) %} 7 | 8 | select {{ dbt.datediff(start_date, end_date, datepart) }} 9 | 10 | {%- endcall -%} 11 | 12 | {%- set value_list = load_result('get_intervals_between') -%} 13 | 14 | {%- if value_list and value_list['data'] -%} 15 | {%- set values = value_list['data'] | map(attribute=0) | list %} 16 | {{ return(values[0]) }} 17 | {%- else -%} 18 | {{ return(1) }} 19 | {%- endif -%} 20 | 21 | {%- endmacro %} 22 | 23 | 24 | 25 | 26 | {% macro date_spine(datepart, start_date, end_date) %} 27 | {{ return(adapter.dispatch('date_spine', 'dbt_utils')(datepart, start_date, end_date)) }} 28 | {%- endmacro %} 29 | 30 | {% macro default__date_spine(datepart, start_date, end_date) %} 31 | 32 | 33 | {# call as follows: 34 | 35 | date_spine( 36 | "day", 37 | "to_date('01/01/2016', 'mm/dd/yyyy')", 38 | "dbt.dateadd(week, 1, current_date)" 39 | ) #} 40 | 41 | 42 | with rawdata as ( 43 | 44 | {{dbt_utils.generate_series( 45 | dbt_utils.get_intervals_between(start_date, end_date, datepart) 46 | )}} 47 | 48 | ), 49 | 50 | all_periods as ( 51 | 52 | select ( 53 | {{ 54 | dbt.dateadd( 55 | datepart, 56 | "row_number() over (order by generated_number) - 1", 57 | start_date 58 | ) 59 | }} 60 | ) as date_{{datepart}} 61 | from rawdata 62 | 63 | ), 64 | 65 | filtered as ( 66 | 67 | select * 68 | from all_periods 69 | where date_{{datepart}} <= {{ end_date }} 70 | 71 | ) 72 | 73 | select * from filtered 74 | 75 | {% endmacro %} 76 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/star.sql: -------------------------------------------------------------------------------- 1 | {% macro star(from, relation_alias=False, except=[], prefix='', suffix='', quote_identifiers=True) -%} 2 | {{ return(adapter.dispatch('star', 'dbt_utils')(from, relation_alias, except, prefix, suffix, quote_identifiers)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__star(from, relation_alias=False, except=[], prefix='', suffix='', quote_identifiers=True) -%} 6 | {%- do dbt_utils._is_relation(from, 'star') -%} 7 | {%- do dbt_utils._is_ephemeral(from, 'star') -%} 8 | 9 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 10 | {%- if not execute -%} 11 | {% do return('*') %} 12 | {%- endif -%} 13 | 14 | {% set cols = dbt_utils.get_filtered_columns_in_relation(from, except) %} 15 | 16 | {%- if cols|length <= 0 -%} 17 | {% if flags.WHICH == 'compile' %} 18 | {% set response %} 19 | * 20 | /* No columns were returned. Maybe the relation doesn't exist yet 21 | or all columns were excluded. This star is only output during 22 | dbt compile, and exists to keep SQLFluff happy. */ 23 | {% endset %} 24 | {% do return(response) %} 25 | {% else %} 26 | {% do return("/* no columns returned from star() macro */") %} 27 | {% endif %} 28 | {%- else -%} 29 | {%- for col in cols %} 30 | {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%} 31 | {%- if quote_identifiers -%} 32 | {{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' %} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%} 33 | {%- else -%} 34 | {{ col|trim }} {%- if prefix!='' or suffix!='' %} as {{ (prefix ~ col ~ suffix)|trim }} {%- endif -%} 35 | {% endif %} 36 | {%- if not loop.last %},{{ '\n ' }}{%- endif -%} 37 | {%- endfor -%} 38 | {% endif %} 39 | {%- endmacro %} 40 | 41 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/haversine_distance.sql: -------------------------------------------------------------------------------- 1 | {# 2 | This calculates the distance between two sets of latitude and longitude. 3 | The formula is from the following blog post: 4 | http://daynebatten.com/2015/09/latitude-longitude-distance-sql/ 5 | 6 | The arguments should be float type. 7 | #} 8 | 9 | {% macro degrees_to_radians(degrees) -%} 10 | acos(-1) * {{degrees}} / 180 11 | {%- endmacro %} 12 | 13 | {% macro haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} 14 | {{ return(adapter.dispatch('haversine_distance', 'dbt_utils')(lat1,lon1,lat2,lon2,unit)) }} 15 | {% endmacro %} 16 | 17 | {% macro default__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} 18 | {%- if unit == 'mi' %} 19 | {% set conversion_rate = 1 %} 20 | {% elif unit == 'km' %} 21 | {% set conversion_rate = 1.60934 %} 22 | {% else %} 23 | {{ exceptions.raise_compiler_error("unit input must be one of 'mi' or 'km'. Got " ~ unit) }} 24 | {% endif %} 25 | 26 | 2 * 3961 * asin(sqrt(power((sin(radians(({{ lat2 }} - {{ lat1 }}) / 2))), 2) + 27 | cos(radians({{lat1}})) * cos(radians({{lat2}})) * 28 | power((sin(radians(({{ lon2 }} - {{ lon1 }}) / 2))), 2))) * {{ conversion_rate }} 29 | 30 | {%- endmacro %} 31 | 32 | 33 | 34 | {% macro bigquery__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} 35 | {% set radians_lat1 = dbt_utils.degrees_to_radians(lat1) %} 36 | {% set radians_lat2 = dbt_utils.degrees_to_radians(lat2) %} 37 | {% set radians_lon1 = dbt_utils.degrees_to_radians(lon1) %} 38 | {% set radians_lon2 = dbt_utils.degrees_to_radians(lon2) %} 39 | {%- if unit == 'mi' %} 40 | {% set conversion_rate = 1 %} 41 | {% elif unit == 'km' %} 42 | {% set conversion_rate = 1.60934 %} 43 | {% else %} 44 | {{ exceptions.raise_compiler_error("unit input must be one of 'mi' or 'km'. Got " ~ unit) }} 45 | {% endif %} 46 | 2 * 3961 * asin(sqrt(power(sin(({{ radians_lat2 }} - {{ radians_lat1 }}) / 2), 2) + 47 | cos({{ radians_lat1 }}) * cos({{ radians_lat2 }}) * 48 | power(sin(({{ radians_lon2 }} - {{ radians_lon1 }}) / 2), 2))) * {{ conversion_rate }} 49 | 50 | {%- endmacro %} 51 | 52 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/generic/expect_table_columns_to_match_set.sql: -------------------------------------------------------------------------------- 1 | {# 2 | This macro is copied and slightly edited from the dbt_expectations package. 3 | At the time of this addition, dbt_expectations couldn't be added because 4 | integration_tests is installing dbt_utils from local without a hard-coded 5 | path. dbt is not able to resolve duplicate dependencies of dbt_utils 6 | due to this. 7 | #} 8 | 9 | {%- test expect_table_columns_to_match_set(model, column_list, transform="upper") -%} 10 | {%- if execute -%} 11 | {%- set column_list = column_list | map(transform) | list -%} 12 | 13 | {# Replaces dbt_expectations._get_column_list() #} 14 | {%- set relation_column_names = adapter.get_columns_in_relation(model) 15 | | map(attribute="name") 16 | | map(transform) 17 | | list 18 | -%} 19 | 20 | {# Replaces dbt_expectations._list_intersect() #} 21 | {%- set matching_columns = [] -%} 22 | {%- for itm in column_list -%} 23 | {%- if itm in relation_column_names -%} 24 | {%- do matching_columns.append(itm) -%} 25 | {%- endif -%} 26 | {%- endfor -%} 27 | 28 | with relation_columns as ( 29 | 30 | {% for col_name in relation_column_names %} 31 | select cast('{{ col_name }}' as {{ type_string() }}) as relation_column 32 | {% if not loop.last %}union all{% endif %} 33 | {% endfor %} 34 | ), 35 | input_columns as ( 36 | 37 | {% for col_name in column_list %} 38 | select cast('{{ col_name }}' as {{ type_string() }}) as input_column 39 | {% if not loop.last %}union all{% endif %} 40 | {% endfor %} 41 | ) 42 | select * 43 | from 44 | relation_columns r 45 | full outer join 46 | input_columns i on r.relation_column = i.input_column 47 | where 48 | -- catch any column in input list that is not in the list of table columns 49 | -- or any table column that is not in the input list 50 | r.relation_column is null or 51 | i.input_column is null 52 | 53 | {%- endif -%} 54 | {%- endtest -%} 55 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/.github/ISSUE_TEMPLATE/utils_minor_release.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: dbt-utils Minor Release Follow-up 3 | about: A checklist of tasks to complete after making a dbt-utils minor release 4 | title: 'dbt Minor Release Follow up for dbt-utils v0.x.0' 5 | labels: 6 | assignees: '' 7 | --- 8 | 9 | 13 | 14 | ## Process for each dependent package 15 | First, check if this is a breaking change 16 | - [ ] Increase the upper bound of the `dbt-utils` `version:` config in the `packages.yml` of the dependent package. 17 | - [ ] Push to a new branch to see if tests pass, or test locally. 18 | 19 | If this is _not_ a breaking change: 20 | - [ ] Create a patch release 21 | 22 | If this _is_ a breaking change: 23 | - [ ] Fix any breaking changes 24 | - [ ] Increase the lower bound to the current dbt-utils minor version 25 | - [ ] Create a minor release for the package 26 | 27 | ## Checklist of dependent packages 28 | | Package | PR | Release | 29 | |------------------------------------------------------------------------------|--------|-------------| 30 | | [audit-helper](https://github.com/dbt-labs/dbt-audit-helper) | [PR]() | [Release]() | 31 | | [codegen](https://github.com/dbt-labs/dbt-codegen) | [PR]() | [Release]() | 32 | | [redshift](https://github.com/dbt-labs/redshift) | [PR]() | [Release]() | 33 | | [event-logging](https://github.com/dbt-labs/dbt-event-logging) | [PR]() | [Release]() | 34 | | [snowplow](https://github.com/dbt-labs/snowplow) | [PR]() | [Release]() | 35 | | [external-tables](https://github.com/dbt-labs/dbt-external-tables) | [PR]() | [Release]() | 36 | | [segment](https://github.com/dbt-labs/segment) | [PR]() | [Release]() | 37 | | [facebook-ads](https://github.com/dbt-labs/facebook-ads) | [PR]() | [Release]() | 38 | | [stitch-utils](https://github.com/dbt-labs/stitch-utils) | [PR]() | [Release]() | 39 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | name: 'dbt_utils_integration_tests' 3 | version: '1.0' 4 | 5 | profile: 'integration_tests' 6 | 7 | # require-dbt-version: inherit this from dbt-utils 8 | 9 | config-version: 2 10 | 11 | model-paths: ["models"] 12 | analysis-paths: ["analysis"] 13 | test-paths: ["tests"] 14 | seed-paths: ["data"] 15 | macro-paths: ["macros"] 16 | 17 | target-path: "target" # directory which will store compiled SQL files 18 | clean-targets: # directories to be removed by `dbt clean` 19 | - "target" 20 | - "dbt_modules" 21 | - "dbt_packages" 22 | 23 | flags: 24 | send_anonymous_usage_stats: False 25 | use_colors: True 26 | 27 | dispatch: 28 | - macro_namespace: 'dbt_utils' 29 | search_order: ['dbt_utils_integration_tests', 'dbt_utils'] 30 | 31 | seeds: 32 | 33 | +quote_columns: false 34 | dbt_utils_integration_tests: 35 | 36 | sql: 37 | data_events_20180103: 38 | +schema: events 39 | 40 | data_get_column_values_dropped: 41 | # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet 42 | +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}" 43 | 44 | data_get_single_value: 45 | +column_types: 46 | date_value: timestamp 47 | float_value: float 48 | int_value: integer 49 | 50 | data_width_bucket: 51 | +column_types: 52 | num_buckets: integer 53 | min_value: float 54 | max_value: float 55 | 56 | data_unpivot_quote: 57 | +quote_columns: true 58 | 59 | data_unpivot_quote_expected: 60 | +quote_columns: true 61 | 62 | schema_tests: 63 | data_test_sequential_timestamps: 64 | +column_types: 65 | my_timestamp: timestamp 66 | 67 | data_test_equality_floats_a: 68 | +column_types: 69 | float_number: float 70 | 71 | data_test_equality_floats_columns_a: 72 | +column_types: 73 | float_number: float 74 | 75 | data_test_equality_floats_b: 76 | +column_types: 77 | float_number: float 78 | 79 | data_test_equality_floats_columns_b: 80 | +column_types: 81 | float_number: float 82 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/equal_rowcount.sql: -------------------------------------------------------------------------------- 1 | {% test equal_rowcount(model, compare_model, group_by_columns = []) %} 2 | {{ return(adapter.dispatch('test_equal_rowcount', 'dbt_utils')(model, compare_model, group_by_columns)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_equal_rowcount(model, compare_model, group_by_columns) %} 6 | 7 | {#-- Needs to be set at parse time, before we return '' below --#} 8 | {{ config(fail_calc = 'sum(coalesce(diff_count, 0))') }} 9 | 10 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 11 | {%- if not execute -%} 12 | {{ return('') }} 13 | {% endif %} 14 | 15 | {% if group_by_columns|length() > 0 %} 16 | {% set select_gb_cols = group_by_columns|join(', ') + ', ' %} 17 | {% set join_gb_cols %} 18 | {% for c in group_by_columns %} 19 | and a.{{c}} = b.{{c}} 20 | {% endfor %} 21 | {% endset %} 22 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 23 | {% endif %} 24 | 25 | {#-- We must add a fake join key in case additional grouping variables are not provided --#} 26 | {#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#} 27 | {#-- The same logic is used in fewer_rows_than. In case of changes, maintain consistent logic --#} 28 | {% set group_by_columns = ['id_dbtutils_test_equal_rowcount'] + group_by_columns %} 29 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 30 | 31 | with a as ( 32 | 33 | select 34 | {{select_gb_cols}} 35 | 1 as id_dbtutils_test_equal_rowcount, 36 | count(*) as count_a 37 | from {{ model }} 38 | {{groupby_gb_cols}} 39 | 40 | 41 | ), 42 | b as ( 43 | 44 | select 45 | {{select_gb_cols}} 46 | 1 as id_dbtutils_test_equal_rowcount, 47 | count(*) as count_b 48 | from {{ compare_model }} 49 | {{groupby_gb_cols}} 50 | 51 | ), 52 | final as ( 53 | 54 | select 55 | 56 | {% for c in group_by_columns -%} 57 | a.{{c}} as {{c}}_a, 58 | b.{{c}} as {{c}}_b, 59 | {% endfor %} 60 | 61 | count_a, 62 | count_b, 63 | abs(count_a - count_b) as diff_count 64 | 65 | from a 66 | full join b 67 | on 68 | a.id_dbtutils_test_equal_rowcount = b.id_dbtutils_test_equal_rowcount 69 | {{join_gb_cols}} 70 | 71 | 72 | ) 73 | 74 | select * from final 75 | 76 | {% endmacro %} 77 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = lint_all, testenv 4 | 5 | [testenv] 6 | passenv = 7 | # postgres env vars 8 | POSTGRES_HOST 9 | POSTGRES_USER 10 | DBT_ENV_SECRET_POSTGRES_PASS 11 | POSTGRES_PORT 12 | POSTGRES_DATABASE 13 | POSTGRES_SCHEMA 14 | # snowflake env vars 15 | SNOWFLAKE_ACCOUNT 16 | SNOWFLAKE_USER 17 | DBT_ENV_SECRET_SNOWFLAKE_PASS 18 | SNOWFLAKE_ROLE 19 | SNOWFLAKE_DATABASE 20 | SNOWFLAKE_WAREHOUSE 21 | SNOWFLAKE_SCHEMA 22 | # redshift 23 | REDSHIFT_HOST 24 | REDSHIFT_USER 25 | DBT_ENV_SECRET_REDSHIFT_PASS 26 | REDSHIFT_DATABASE 27 | REDSHIFT_SCHEMA 28 | REDSHIFT_PORT 29 | # bigquery 30 | BIGQUERY_PROJECT 31 | BIGQUERY_KEYFILE_JSON 32 | BIGQUERY_SCHEMA 33 | 34 | # Snowflake integration tests for centralized dbt testing 35 | # run dbt commands directly, assumes dbt is already installed in environment 36 | [testenv:dbt_integration_snowflake] 37 | changedir = integration_tests 38 | allowlist_externals = 39 | dbt 40 | skip_install = true 41 | commands = 42 | dbt --version 43 | dbt debug --target snowflake 44 | dbt deps --target snowflake 45 | dbt build --target snowflake --full-refresh 46 | 47 | 48 | # Postgres integration tests for centralized dbt testing 49 | # run dbt commands directly, assumes dbt is already installed in environment 50 | [testenv:dbt_integration_postgres] 51 | changedir = integration_tests 52 | allowlist_externals = 53 | dbt 54 | skip_install = true 55 | commands = 56 | dbt --version 57 | dbt debug --target postgres 58 | dbt deps --target postgres 59 | dbt build --target postgres --full-refresh 60 | 61 | # BigQuery integration tests for centralized dbt testing 62 | # run dbt commands directly, assumes dbt is already installed in environment 63 | [testenv:dbt_integration_bigquery] 64 | changedir = integration_tests 65 | allowlist_externals = 66 | dbt 67 | skip_install = true 68 | commands = 69 | dbt --version 70 | dbt debug --target bigquery 71 | dbt deps --target bigquery 72 | dbt build --target bigquery --full-refresh 73 | 74 | # redshift integration tests for centralized dbt testing 75 | # run dbt commands directly, assumes dbt is already installed in environment 76 | [testenv:dbt_integration_redshift] 77 | changedir = integration_tests 78 | allowlist_externals = 79 | dbt 80 | skip_install = true 81 | commands = 82 | dbt --version 83 | dbt debug --target redshift 84 | dbt deps --target redshift 85 | dbt build --target redshift --full-refresh 86 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/sql/get_column_values.sql: -------------------------------------------------------------------------------- 1 | {% macro get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none, where=none) -%} 2 | {{ return(adapter.dispatch('get_column_values', 'dbt_utils')(table, column, order_by, max_records, default, where)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none, where=none) -%} 6 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 7 | {%- if not execute -%} 8 | {% set default = [] if not default %} 9 | {{ return(default) }} 10 | {% endif %} 11 | 12 | {%- do dbt_utils._is_ephemeral(table, 'get_column_values') -%} 13 | 14 | {# Not all relations are tables. Renaming for internal clarity without breaking functionality for anyone using named arguments #} 15 | {# TODO: Change the method signature in a future 0.x.0 release #} 16 | {%- set target_relation = table -%} 17 | 18 | {# adapter.load_relation is a convenience wrapper to avoid building a Relation when we already have one #} 19 | {% set relation_exists = (load_relation(target_relation)) is not none %} 20 | 21 | {%- call statement('get_column_values', fetch_result=true) %} 22 | 23 | {%- if not relation_exists and default is none -%} 24 | 25 | {{ exceptions.raise_compiler_error("In get_column_values(): relation " ~ target_relation ~ " does not exist and no default value was provided.") }} 26 | 27 | {%- elif not relation_exists and default is not none -%} 28 | 29 | {{ log("Relation " ~ target_relation ~ " does not exist. Returning the default value: " ~ default) }} 30 | 31 | {{ return(default) }} 32 | 33 | {%- else -%} 34 | 35 | 36 | select 37 | {{ column }} as value 38 | 39 | from {{ target_relation }} 40 | 41 | {% if where is not none %} 42 | where {{ where }} 43 | {% endif %} 44 | 45 | group by {{ column }} 46 | order by {{ order_by }} 47 | 48 | {% if max_records is not none %} 49 | limit {{ max_records }} 50 | {% endif %} 51 | 52 | {% endif %} 53 | 54 | {%- endcall -%} 55 | 56 | {%- set value_list = load_result('get_column_values') -%} 57 | 58 | {%- if value_list and value_list['data'] -%} 59 | {%- set values = value_list['data'] | map(attribute=0) | list %} 60 | {{ return(values) }} 61 | {%- else -%} 62 | {{ return(default) }} 63 | {%- endif -%} 64 | 65 | {%- endmacro %} 66 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/docs/decisions/adr-0001-decision-record-format.md: -------------------------------------------------------------------------------- 1 | # FORMAT AND STRUCTURE OF DECISION RECORDS 2 | 3 | ## CONTEXT 4 | We previousy decicded to record any decisions made in this project using Nygard's architecture decision record (ADR) format. Should we continue with this format or adopt an alternative? 5 | 6 | There are multiple options for formatting: 7 | * [MADR 3.0.0-beta.2](https://github.com/adr/madr/blob/3.0.0-beta.2/template/adr-template.md) – Markdown Any Decision Records 8 | * [Michael Nygard's template](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions) – What we are using currently 9 | * [Sustainable Architectural Decisions](https://www.infoq.com/articles/sustainable-architectural-design-decisions) – The Y-Statements 10 | * Other templates listed at 11 | 12 | If we choose to adopt a new format, we'll need to also choose whether to re-format previous decisions. The two main options are: 13 | 1. Keep the original formatting 14 | 1. Re-format all previous records according to MADR 15 | 16 | Keeping the original formatting would have the benefit of not altering Nygard's original post, which was adopted as-is for its elegant self-describing nature. It would have the downside of inconsistent formatting though. 17 | 18 | Re-formatting would resolve consistency at the cost of altering Nygard's original work. 19 | 20 | ## DECISION 21 | Chosen option: "MADR 3.0.0-beta.2", because 22 | 23 | * MADR is a matured version of the original ADR proposal that represents the state-of-the-art for ADR. 24 | * MADR has ongoing development and is maintained similar to a software project. 25 | * MADR explicitly uses Markdown, which is easy to read and write. 26 | * MADR 3.0 (optionally) contains structured elements in a YAML block for machine-readability. 27 | 28 | * MADR allows for structured capturing of any decision. 29 | * The MADR project is active and continues to iterate with new versions. 30 | * The MADR project itself is maintained like sofware with specifications and new versions. 31 | 32 | Choosen option: "keep original formatting", because it feels special and deserves to be celebrated, even if there is slight inconsistency of formatting as a result. This decision is easily reversible in the future, if need be. 33 | 34 | ## STATUS 35 | Accepted. 36 | 37 | ## CONSEQUENCES 38 | New decisions will follow the MADR 3.0.0-beta.2 format, and we will update this decision and following decisions once MADR 3.0.0 is officially released. However, previous decisions may retain the original Nygard format. All decision records will be renamed according to MADR conventions including moving from `doc/arch` to `docs/decisions`. 39 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/macros/generic_tests/fewer_rows_than.sql: -------------------------------------------------------------------------------- 1 | {% test fewer_rows_than(model, compare_model, group_by_columns = []) %} 2 | {{ return(adapter.dispatch('test_fewer_rows_than', 'dbt_utils')(model, compare_model, group_by_columns)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_fewer_rows_than(model, compare_model, group_by_columns) %} 6 | 7 | {{ config(fail_calc = 'sum(coalesce(row_count_delta, 0))') }} 8 | 9 | {% if group_by_columns|length() > 0 %} 10 | {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} 11 | {% set join_gb_cols %} 12 | {% for c in group_by_columns %} 13 | and a.{{c}} = b.{{c}} 14 | {% endfor %} 15 | {% endset %} 16 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 17 | {% endif %} 18 | 19 | {#-- We must add a fake join key in case additional grouping variables are not provided --#} 20 | {#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#} 21 | {#-- The same logic is used in equal_rowcount. In case of changes, maintain consistent logic --#} 22 | {% set group_by_columns = ['id_dbtutils_test_fewer_rows_than'] + group_by_columns %} 23 | {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} 24 | 25 | 26 | with a as ( 27 | 28 | select 29 | {{select_gb_cols}} 30 | 1 as id_dbtutils_test_fewer_rows_than, 31 | count(*) as count_our_model 32 | from {{ model }} 33 | {{ groupby_gb_cols }} 34 | 35 | ), 36 | b as ( 37 | 38 | select 39 | {{select_gb_cols}} 40 | 1 as id_dbtutils_test_fewer_rows_than, 41 | count(*) as count_comparison_model 42 | from {{ compare_model }} 43 | {{ groupby_gb_cols }} 44 | 45 | ), 46 | counts as ( 47 | 48 | select 49 | 50 | {% for c in group_by_columns -%} 51 | a.{{c}} as {{c}}_a, 52 | b.{{c}} as {{c}}_b, 53 | {% endfor %} 54 | 55 | count_our_model, 56 | count_comparison_model 57 | from a 58 | full join b on 59 | a.id_dbtutils_test_fewer_rows_than = b.id_dbtutils_test_fewer_rows_than 60 | {{ join_gb_cols }} 61 | 62 | ), 63 | final as ( 64 | 65 | select *, 66 | case 67 | -- fail the test if we have more rows than the reference model and return the row count delta 68 | when count_our_model > count_comparison_model then (count_our_model - count_comparison_model) 69 | -- fail the test if they are the same number 70 | when count_our_model = count_comparison_model then 1 71 | -- pass the test if the delta is positive (i.e. return the number 0) 72 | else 0 73 | end as row_count_delta 74 | from counts 75 | 76 | ) 77 | 78 | select * from final 79 | 80 | {% endmacro %} 81 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T183000+0000/task_id=extract_sessions/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-15T20:45:01.643+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-15T20:45:01.801+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-15T20:45:01.828+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-15T20:45:01.829+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-15T20:45:01.869+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 18:30:00+00:00 6 | [2025-09-15T20:45:01.905+0200] {standard_task_runner.py:64} INFO - Started process 901 to run task 7 | [2025-09-15T20:45:01.947+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_sessions', 'scheduled__2025-09-15T18:30:00+00:00', '--job-id', '25', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpt5i5tx7g'] 8 | [2025-09-15T20:45:01.969+0200] {standard_task_runner.py:91} INFO - Job 25: Subtask extract_sessions 9 | [2025-09-15T20:45:02.508+0200] {task_command.py:426} INFO - Running on host f51a750ef2c3 10 | [2025-09-15T20:45:04.180+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_sessions' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T18:30:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T18:30:00+00:00' 11 | [2025-09-15T20:45:04.183+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-15T20:45:05.388+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-15T20:45:05.391+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-15T20:45:05.465+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-15T20:45:05.467+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-15T20:45:05.553+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_sessions, run_id=scheduled__2025-09-15T18:30:00+00:00, execution_date=20250915T183000, start_date=20250915T184501, end_date=20250915T184505 17 | [2025-09-15T20:45:05.640+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-15T20:45:05.686+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-17T141500+0000/task_id=extract_sessions/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-17T16:30:02.908+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-17T16:30:03.186+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-17T16:30:03.250+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-17T16:30:03.289+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-17T16:30:03.837+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-17 14:15:00+00:00 6 | [2025-09-17T16:30:03.968+0200] {standard_task_runner.py:64} INFO - Started process 621 to run task 7 | [2025-09-17T16:30:03.992+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_sessions', 'scheduled__2025-09-17T14:15:00+00:00', '--job-id', '75', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmp2n_x_p1m'] 8 | [2025-09-17T16:30:04.072+0200] {standard_task_runner.py:91} INFO - Job 75: Subtask extract_sessions 9 | [2025-09-17T16:30:05.580+0200] {task_command.py:426} INFO - Running on host 1621268d5064 10 | [2025-09-17T16:30:07.089+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_sessions' AIRFLOW_CTX_EXECUTION_DATE='2025-09-17T14:15:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-17T14:15:00+00:00' 11 | [2025-09-17T16:30:07.091+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-17T16:30:08.606+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-17T16:30:08.611+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-17T16:30:08.679+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-17T16:30:08.685+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-17T16:30:08.750+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_sessions, run_id=scheduled__2025-09-17T14:15:00+00:00, execution_date=20250917T141500, start_date=20250917T143003, end_date=20250917T143008 17 | [2025-09-17T16:30:08.838+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-17T16:30:08.897+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-17T154500+0000/task_id=extract_payments/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-17T18:00:01.144+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-17T18:00:01.214+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-17T18:00:01.227+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-17T18:00:01.228+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-17T18:00:01.248+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-17 15:45:00+00:00 6 | [2025-09-17T18:00:01.259+0200] {standard_task_runner.py:64} INFO - Started process 1683 to run task 7 | [2025-09-17T18:00:01.269+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_payments', 'scheduled__2025-09-17T15:45:00+00:00', '--job-id', '105', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpcyq08o57'] 8 | [2025-09-17T18:00:01.275+0200] {standard_task_runner.py:91} INFO - Job 105: Subtask extract_payments 9 | [2025-09-17T18:00:01.659+0200] {task_command.py:426} INFO - Running on host 1621268d5064 10 | [2025-09-17T18:00:02.353+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_payments' AIRFLOW_CTX_EXECUTION_DATE='2025-09-17T15:45:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-17T15:45:00+00:00' 11 | [2025-09-17T18:00:02.355+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-17T18:00:02.745+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-17T18:00:02.749+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-17T18:00:02.819+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-17T18:00:02.821+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-17T18:00:02.893+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_payments, run_id=scheduled__2025-09-17T15:45:00+00:00, execution_date=20250917T154500, start_date=20250917T160001, end_date=20250917T160002 17 | [2025-09-17T18:00:02.939+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-17T18:00:02.963+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-17T161500+0000/task_id=extract_payments/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-17T18:30:01.391+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-17T18:30:01.467+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-17T18:30:01.492+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-17T18:30:01.494+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-17T18:30:01.562+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-17 16:15:00+00:00 6 | [2025-09-17T18:30:01.580+0200] {standard_task_runner.py:64} INFO - Started process 2028 to run task 7 | [2025-09-17T18:30:01.598+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_payments', 'scheduled__2025-09-17T16:15:00+00:00', '--job-id', '115', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmp0fw_005p'] 8 | [2025-09-17T18:30:01.604+0200] {standard_task_runner.py:91} INFO - Job 115: Subtask extract_payments 9 | [2025-09-17T18:30:01.824+0200] {task_command.py:426} INFO - Running on host 1621268d5064 10 | [2025-09-17T18:30:02.660+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_payments' AIRFLOW_CTX_EXECUTION_DATE='2025-09-17T16:15:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-17T16:15:00+00:00' 11 | [2025-09-17T18:30:02.661+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-17T18:30:03.030+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-17T18:30:03.033+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-17T18:30:03.098+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-17T18:30:03.101+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-17T18:30:03.171+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_payments, run_id=scheduled__2025-09-17T16:15:00+00:00, execution_date=20250917T161500, start_date=20250917T163001, end_date=20250917T163003 17 | [2025-09-17T18:30:03.260+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-17T18:30:03.292+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-19T010000+0000/task_id=extract_sessions/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-19T03:15:00.823+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-19T03:15:00.876+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-19T03:15:00.887+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-19T03:15:00.888+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-19T03:15:00.904+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-19 01:00:00+00:00 6 | [2025-09-19T03:15:00.911+0200] {standard_task_runner.py:64} INFO - Started process 478 to run task 7 | [2025-09-19T03:15:00.918+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_sessions', 'scheduled__2025-09-19T01:00:00+00:00', '--job-id', '29', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmp613s7wqc'] 8 | [2025-09-19T03:15:00.923+0200] {standard_task_runner.py:91} INFO - Job 29: Subtask extract_sessions 9 | [2025-09-19T03:15:01.049+0200] {task_command.py:426} INFO - Running on host 286d1f3cd4b3 10 | [2025-09-19T03:15:01.529+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_sessions' AIRFLOW_CTX_EXECUTION_DATE='2025-09-19T01:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-19T01:00:00+00:00' 11 | [2025-09-19T03:15:01.532+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-19T03:15:02.032+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-19T03:15:02.033+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-19T03:15:02.069+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-19T03:15:02.070+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-19T03:15:02.118+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_sessions, run_id=scheduled__2025-09-19T01:00:00+00:00, execution_date=20250919T010000, start_date=20250919T011500, end_date=20250919T011502 17 | [2025-09-19T03:15:02.178+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-19T03:15:02.206+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T183000+0000/task_id=extract_customers/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-15T20:45:01.646+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-15T20:45:01.802+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-15T20:45:01.831+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-15T20:45:01.832+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-15T20:45:01.888+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 18:30:00+00:00 6 | [2025-09-15T20:45:01.929+0200] {standard_task_runner.py:64} INFO - Started process 902 to run task 7 | [2025-09-15T20:45:01.966+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_customers', 'scheduled__2025-09-15T18:30:00+00:00', '--job-id', '23', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpd6wuv03a'] 8 | [2025-09-15T20:45:01.993+0200] {standard_task_runner.py:91} INFO - Job 23: Subtask extract_customers 9 | [2025-09-15T20:45:02.493+0200] {task_command.py:426} INFO - Running on host f51a750ef2c3 10 | [2025-09-15T20:45:04.161+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_customers' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T18:30:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T18:30:00+00:00' 11 | [2025-09-15T20:45:04.162+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-15T20:45:04.313+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-15T20:45:04.315+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-15T20:45:04.369+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-15T20:45:04.370+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-15T20:45:04.437+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_customers, run_id=scheduled__2025-09-15T18:30:00+00:00, execution_date=20250915T183000, start_date=20250915T184501, end_date=20250915T184504 17 | [2025-09-15T20:45:04.511+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-15T20:45:04.563+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-16T143000+0000/task_id=extract_customers/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-16T17:00:13.287+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-16T17:00:13.528+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-16T17:00:13.558+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-16T17:00:13.571+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-16T17:00:13.637+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-16 14:30:00+00:00 6 | [2025-09-16T17:00:13.667+0200] {standard_task_runner.py:64} INFO - Started process 184 to run task 7 | [2025-09-16T17:00:13.683+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_customers', 'scheduled__2025-09-16T14:30:00+00:00', '--job-id', '18', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmp3344qx42'] 8 | [2025-09-16T17:00:13.702+0200] {standard_task_runner.py:91} INFO - Job 18: Subtask extract_customers 9 | [2025-09-16T17:00:13.073+0200] {task_command.py:426} INFO - Running on host 1621268d5064 10 | [2025-09-16T17:00:15.689+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_customers' AIRFLOW_CTX_EXECUTION_DATE='2025-09-16T14:30:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-16T14:30:00+00:00' 11 | [2025-09-16T17:00:15.697+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-16T17:00:17.101+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-16T17:00:17.112+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-16T17:00:17.297+0200] {python.py:237} INFO - Done. Returned value was: 6 15 | [2025-09-16T17:00:17.342+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-16T17:00:17.545+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_customers, run_id=scheduled__2025-09-16T14:30:00+00:00, execution_date=20250916T143000, start_date=20250916T150013, end_date=20250916T150017 17 | [2025-09-16T17:00:18.052+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-16T17:00:18.110+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 19 | -------------------------------------------------------------------------------- /dbt/dbt_packages/dbt_utils/integration_tests/tests/assert_get_query_results_as_dict_objects_equal.sql: -------------------------------------------------------------------------------- 1 | -- depends_on: {{ ref('data_get_query_results_as_dict') }} 2 | 3 | {% set expected_dictionary={ 4 | 'col_1': [1, 2, 3], 5 | 'col_2': ['a', 'b', 'c'], 6 | 'col_3': [True, False, none] 7 | } %} 8 | 9 | {#- Handle snowflake casing silliness -#} 10 | {% if target.type == 'snowflake' %} 11 | {% set expected_dictionary={ 12 | 'COL_1': [1, 2, 3], 13 | 'COL_2': ['a', 'b', 'c'], 14 | 'COL_3': [True, False, none] 15 | } %} 16 | {% endif %} 17 | 18 | 19 | {% set actual_dictionary=dbt_utils.get_query_results_as_dict( 20 | "select * from " ~ ref('data_get_query_results_as_dict') ~ " order by 1" 21 | ) %} 22 | {#- 23 | For reasons that remain unclear, Jinja won't return True for actual_dictionary == expected_dictionary. 24 | Instead, we'll manually check that the values of these dictionaries are equivalent. 25 | -#} 26 | 27 | {% set ns = namespace( 28 | pass=True, 29 | err_msg = "" 30 | ) %} 31 | {% if execute %} 32 | {#- Check that the dictionaries have the same keys -#} 33 | {% set expected_keys=expected_dictionary.keys() | list | sort %} 34 | {% set actual_keys=actual_dictionary.keys() | list | sort %} 35 | 36 | {% if expected_keys != actual_keys %} 37 | {% set ns.pass=False %} 38 | {% set ns.err_msg %} 39 | The two dictionaries have different keys: 40 | expected_dictionary has keys: {{ expected_keys }} 41 | actual_dictionary has keys: {{ actual_keys }} 42 | {% endset %} 43 | 44 | {% else %} 45 | 46 | {% for key, value in expected_dictionary.items() %} 47 | {% set expected_length=expected_dictionary[key] | length %} 48 | {% set actual_length=actual_dictionary[key] | length %} 49 | 50 | {% if expected_length != actual_length %} 51 | {% set ns.pass=False %} 52 | {% set ns.err_msg %} 53 | The {{ key }} column has different lengths: 54 | expected_dictionary[{{ key }}] has length {{ expected_length }} 55 | actual_dictionary[{{ key }}] has length {{ actual_length }} 56 | {% endset %} 57 | 58 | {% else %} 59 | 60 | {% for i in range(value | length) %} 61 | {% set expected_value=expected_dictionary[key][i] %} 62 | {% set actual_value=actual_dictionary[key][i] %} 63 | {% if expected_value != actual_value %} 64 | {% set ns.pass=False %} 65 | {% set ns.err_msg %} 66 | The {{ key }} column has differing values: 67 | expected_dictionary[{{ key }}][{{ i }}] == {{ expected_value }} 68 | actual_dictionary[{{ key }}][{{ i }}] == {{ actual_value }} 69 | {% endset %} 70 | 71 | {% endif %} 72 | {% endfor %} 73 | {% endif %} 74 | 75 | {% endfor %} 76 | 77 | {% endif %} 78 | 79 | {{ log(ns.err_msg, info=True) }} 80 | select 1 as col_name {% if ns.pass %} {{ limit_zero() }} {% endif %} 81 | {% endif %} 82 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T174500+0000/task_id=extract_sessions/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-15T20:00:59.604+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-15T20:00:59.821+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-15T20:00:59.867+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-15T20:00:59.875+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-15T20:00:59.952+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 17:45:00+00:00 6 | [2025-09-15T20:00:59.979+0200] {standard_task_runner.py:64} INFO - Started process 378 to run task 7 | [2025-09-15T20:01:00.003+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_sessions', 'scheduled__2025-09-15T17:45:00+00:00', '--job-id', '9', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpyuielp_t'] 8 | [2025-09-15T20:01:00.012+0200] {standard_task_runner.py:91} INFO - Job 9: Subtask extract_sessions 9 | [2025-09-15T20:01:00.414+0200] {task_command.py:426} INFO - Running on host f51a750ef2c3 10 | [2025-09-15T20:01:01.644+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_sessions' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T17:45:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T17:45:00+00:00' 11 | [2025-09-15T20:01:01.646+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-15T20:01:03.362+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-15T20:01:03.363+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-15T20:01:03.401+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-15T20:01:03.404+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-15T20:01:03.465+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_sessions, run_id=scheduled__2025-09-15T17:45:00+00:00, execution_date=20250915T174500, start_date=20250915T180059, end_date=20250915T180103 17 | [2025-09-15T20:01:03.524+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-15T20:01:03.574+0200] {taskinstance.py:3503} INFO - 1 downstream tasks scheduled from follow-on schedule check 19 | [2025-09-15T20:01:03.577+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 20 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T214500+0000/task_id=extract_payments/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-16T00:00:01.317+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-16T00:00:01.380+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-16T00:00:01.392+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-16T00:00:01.393+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-16T00:00:01.417+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 21:45:00+00:00 6 | [2025-09-16T00:00:01.427+0200] {standard_task_runner.py:64} INFO - Started process 269 to run task 7 | [2025-09-16T00:00:01.438+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_payments', 'scheduled__2025-09-15T21:45:00+00:00', '--job-id', '9', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmprxknwxpn'] 8 | [2025-09-16T00:00:01.444+0200] {standard_task_runner.py:91} INFO - Job 9: Subtask extract_payments 9 | [2025-09-16T00:00:01.632+0200] {task_command.py:426} INFO - Running on host b91cbf0a6013 10 | [2025-09-16T00:00:02.374+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_payments' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T21:45:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T21:45:00+00:00' 11 | [2025-09-16T00:00:02.376+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-16T00:00:02.664+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-16T00:00:02.665+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-16T00:00:02.722+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-16T00:00:02.723+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-16T00:00:02.777+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_payments, run_id=scheduled__2025-09-15T21:45:00+00:00, execution_date=20250915T214500, start_date=20250915T220001, end_date=20250915T220002 17 | [2025-09-16T00:00:02.842+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-16T00:00:02.904+0200] {taskinstance.py:3503} INFO - 0 downstream tasks scheduled from follow-on schedule check 19 | [2025-09-16T00:00:02.909+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 20 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T214500+0000/task_id=extract_sessions/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-16T00:00:01.311+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-16T00:00:01.374+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-16T00:00:01.389+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-16T00:00:01.392+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-16T00:00:01.414+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 21:45:00+00:00 6 | [2025-09-16T00:00:01.425+0200] {standard_task_runner.py:64} INFO - Started process 268 to run task 7 | [2025-09-16T00:00:01.437+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_sessions', 'scheduled__2025-09-15T21:45:00+00:00', '--job-id', '7', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpeye7lpma'] 8 | [2025-09-16T00:00:01.443+0200] {standard_task_runner.py:91} INFO - Job 7: Subtask extract_sessions 9 | [2025-09-16T00:00:01.615+0200] {task_command.py:426} INFO - Running on host b91cbf0a6013 10 | [2025-09-16T00:00:02.383+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_sessions' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T21:45:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T21:45:00+00:00' 11 | [2025-09-16T00:00:02.385+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-16T00:00:02.930+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-16T00:00:02.932+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-16T00:00:02.994+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-16T00:00:02.998+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-16T00:00:03.093+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_sessions, run_id=scheduled__2025-09-15T21:45:00+00:00, execution_date=20250915T214500, start_date=20250915T220001, end_date=20250915T220003 17 | [2025-09-16T00:00:03.202+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-16T00:00:03.296+0200] {taskinstance.py:3503} INFO - 0 downstream tasks scheduled from follow-on schedule check 19 | [2025-09-16T00:00:03.301+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 20 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T224500+0000/task_id=extract_payments/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-16T01:00:00.961+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-16T01:00:01.090+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-16T01:00:01.121+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-16T01:00:01.122+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-16T01:00:01.170+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 22:45:00+00:00 6 | [2025-09-16T01:00:01.184+0200] {standard_task_runner.py:64} INFO - Started process 275 to run task 7 | [2025-09-16T01:00:01.193+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_payments', 'scheduled__2025-09-15T22:45:00+00:00', '--job-id', '7', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmprs10ellx'] 8 | [2025-09-16T01:00:01.201+0200] {standard_task_runner.py:91} INFO - Job 7: Subtask extract_payments 9 | [2025-09-16T01:00:01.414+0200] {task_command.py:426} INFO - Running on host 1621268d5064 10 | [2025-09-16T01:00:02.341+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_payments' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T22:45:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T22:45:00+00:00' 11 | [2025-09-16T01:00:02.345+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-16T01:00:02.683+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-16T01:00:02.684+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-16T01:00:02.725+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-16T01:00:02.727+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-16T01:00:02.779+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_payments, run_id=scheduled__2025-09-15T22:45:00+00:00, execution_date=20250915T224500, start_date=20250915T230001, end_date=20250915T230002 17 | [2025-09-16T01:00:02.824+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-16T01:00:02.875+0200] {taskinstance.py:3503} INFO - 0 downstream tasks scheduled from follow-on schedule check 19 | [2025-09-16T01:00:02.882+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 20 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-15T224500+0000/task_id=extract_sessions/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-16T01:00:00.952+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-16T01:00:01.058+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-16T01:00:01.089+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-16T01:00:01.091+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-16T01:00:01.126+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-15 22:45:00+00:00 6 | [2025-09-16T01:00:01.146+0200] {standard_task_runner.py:64} INFO - Started process 273 to run task 7 | [2025-09-16T01:00:01.154+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_sessions', 'scheduled__2025-09-15T22:45:00+00:00', '--job-id', '8', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpiakeaf0d'] 8 | [2025-09-16T01:00:01.167+0200] {standard_task_runner.py:91} INFO - Job 8: Subtask extract_sessions 9 | [2025-09-16T01:00:01.368+0200] {task_command.py:426} INFO - Running on host 1621268d5064 10 | [2025-09-16T01:00:02.171+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_sessions' AIRFLOW_CTX_EXECUTION_DATE='2025-09-15T22:45:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-15T22:45:00+00:00' 11 | [2025-09-16T01:00:02.175+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-16T01:00:02.840+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-16T01:00:02.841+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-16T01:00:02.887+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-16T01:00:02.891+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-16T01:00:02.934+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_sessions, run_id=scheduled__2025-09-15T22:45:00+00:00, execution_date=20250915T224500, start_date=20250915T230001, end_date=20250915T230002 17 | [2025-09-16T01:00:02.981+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-16T01:00:03.020+0200] {taskinstance.py:3503} INFO - 1 downstream tasks scheduled from follow-on schedule check 19 | [2025-09-16T01:00:03.023+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 20 | -------------------------------------------------------------------------------- /airflow/logs/dag_id=etl_api_to_bi/run_id=scheduled__2025-09-17T170000+0000/task_id=extract_payments/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2025-09-17T19:15:01.451+0200] {local_task_job_runner.py:120} INFO - ::group::Pre task execution logs 2 | [2025-09-17T19:15:01.593+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 3 | [2025-09-17T19:15:01.625+0200] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti= 4 | [2025-09-17T19:15:01.630+0200] {taskinstance.py:2306} INFO - Starting attempt 1 of 3 5 | [2025-09-17T19:15:01.664+0200] {taskinstance.py:2330} INFO - Executing on 2025-09-17 17:00:00+00:00 6 | [2025-09-17T19:15:01.679+0200] {standard_task_runner.py:64} INFO - Started process 285 to run task 7 | [2025-09-17T19:15:01.686+0200] {standard_task_runner.py:90} INFO - Running: ['airflow', 'tasks', 'run', 'etl_api_to_bi', 'extract_payments', 'scheduled__2025-09-17T17:00:00+00:00', '--job-id', '8', '--raw', '--subdir', 'DAGS_FOLDER/etl_api_to_bi.py', '--cfg-path', '/tmp/tmpyb3j1uif'] 8 | [2025-09-17T19:15:01.692+0200] {standard_task_runner.py:91} INFO - Job 8: Subtask extract_payments 9 | [2025-09-17T19:15:01.917+0200] {task_command.py:426} INFO - Running on host cc1d85c9aa13 10 | [2025-09-17T19:15:02.917+0200] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='data' AIRFLOW_CTX_DAG_ID='etl_api_to_bi' AIRFLOW_CTX_TASK_ID='extract_payments' AIRFLOW_CTX_EXECUTION_DATE='2025-09-17T17:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2025-09-17T17:00:00+00:00' 11 | [2025-09-17T19:15:02.918+0200] {taskinstance.py:430} INFO - ::endgroup:: 12 | [2025-09-17T19:15:03.304+0200] {crypto.py:82} WARNING - empty cryptography key - values will not be stored encrypted. 13 | [2025-09-17T19:15:03.310+0200] {base.py:84} INFO - Using connection ID 'postgres_default' for task execution. 14 | [2025-09-17T19:15:03.407+0200] {python.py:237} INFO - Done. Returned value was: 1 15 | [2025-09-17T19:15:03.409+0200] {taskinstance.py:441} INFO - ::group::Post task execution logs 16 | [2025-09-17T19:15:03.498+0200] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=etl_api_to_bi, task_id=extract_payments, run_id=scheduled__2025-09-17T17:00:00+00:00, execution_date=20250917T170000, start_date=20250917T171501, end_date=20250917T171503 17 | [2025-09-17T19:15:03.616+0200] {local_task_job_runner.py:243} INFO - Task exited with return code 0 18 | [2025-09-17T19:15:03.703+0200] {taskinstance.py:3503} INFO - 0 downstream tasks scheduled from follow-on schedule check 19 | [2025-09-17T19:15:03.712+0200] {local_task_job_runner.py:222} INFO - ::endgroup:: 20 | --------------------------------------------------------------------------------