├── .flake8 ├── .github └── workflows │ ├── bump-version.yml │ ├── remind-docs-and-tests.yml │ ├── run-precommit.yml │ ├── test-all-warehouses-dbt-pre-releases.yml │ ├── test-all-warehouses.yml │ └── test-warehouse.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── analyses └── .gitkeep ├── data └── .gitkeep ├── dbt_project.yml ├── dev-requirements.txt ├── integration_tests ├── README.md ├── dbt_project │ ├── .gitignore │ ├── data │ │ └── .gitkeep │ ├── dbt_project.yml │ ├── debug.sh │ ├── macros │ │ ├── clear_env.sql │ │ ├── create_all_types_table.sql │ │ ├── dbg.sql │ │ ├── generate_schema_name.sql │ │ ├── get_anomaly_config.sql │ │ ├── materializations.sql │ │ └── python.sql │ ├── models │ │ ├── customers.sql │ │ ├── exposures.yml │ │ ├── metrics │ │ │ ├── python │ │ │ │ └── metrics_python_table.py │ │ │ └── sql │ │ │ │ ├── metrics_incremental.sql │ │ │ │ ├── metrics_table.sql │ │ │ │ └── metrics_view.sql │ │ ├── one.sql │ │ ├── orders.sql │ │ ├── schema.yml │ │ ├── test_data.yaml │ │ └── tmp │ │ │ └── .gitkeep │ ├── packages.yml │ ├── seeds │ │ ├── stg_customers.csv │ │ └── stg_orders.csv │ └── selectors.yml ├── deprecated_tests │ ├── README.md │ ├── data │ │ ├── training │ │ │ ├── any_type_column_anomalies_training.csv │ │ │ ├── backfill_days_column_anomalies_training.csv │ │ │ ├── dimension_anomalies_training.csv │ │ │ ├── groups_training.csv │ │ │ ├── numeric_column_anomalies_training.csv │ │ │ ├── stats_players_training.csv │ │ │ ├── stats_team_training.csv │ │ │ └── string_column_anomalies_training.csv │ │ └── validation │ │ │ ├── any_type_column_anomalies_validation.csv │ │ │ ├── backfill_days_column_anomalies_validation.csv │ │ │ ├── dimension_anomalies_validation.csv │ │ │ ├── groups_validation.csv │ │ │ ├── numeric_column_anomalies_validation.csv │ │ │ ├── stats_players_validation.csv │ │ │ ├── stats_team_validation.csv │ │ │ └── string_column_anomalies_validation.csv │ ├── dbt_project.yml │ ├── debug.sh │ ├── generate_data.py │ ├── macros │ │ ├── asserts.sql │ │ ├── e2e_tests │ │ │ ├── clear_tests.sql │ │ │ ├── test_config_levels.sql │ │ │ ├── tests_validation.sql │ │ │ ├── utils │ │ │ │ ├── list_assertions.sql │ │ │ │ └── table_assertions.sql │ │ │ ├── validate_backfill_days.sql │ │ │ ├── validate_column_anomalies.sql │ │ │ ├── validate_config_levels.sql │ │ │ ├── validate_dimensions_anomalies.sql │ │ │ ├── validate_directional_anomalies.sql │ │ │ ├── validate_freshness_anomalies.sql │ │ │ ├── validate_schema_changes.sql │ │ │ ├── validate_seasonal_volume_anomalies.sql │ │ │ └── validate_table_anomalies.sql │ │ ├── generic_tests │ │ │ ├── generic_test_on_column.sql │ │ │ └── generic_test_on_model.sql │ │ ├── system │ │ │ ├── dbg.sql │ │ │ ├── generate_schema_name.sql │ │ │ ├── materializations.sql │ │ │ ├── read_table.sql │ │ │ └── return_config_var.sql │ │ └── unit_tests │ │ │ └── test_adapter_specific_macros_have_default_implementation.sql │ ├── models │ │ ├── any_type_column_anomalies.sql │ │ ├── backfill_days_column_anomalies.sql │ │ ├── config_levels_project.sql │ │ ├── config_levels_test_and_model.sql │ │ ├── copy_numeric_column_anomalies.sql │ │ ├── dimension_anomalies.sql │ │ ├── ephemeral_model.sql │ │ ├── error_model.sql │ │ ├── groups.sql │ │ ├── nested │ │ │ └── models │ │ │ │ └── tree │ │ │ │ └── nested.sql │ │ ├── no_timestamp_anomalies.sql │ │ ├── non_dbt_model.sql │ │ ├── numeric_column_anomalies.sql │ │ ├── one.sql │ │ ├── schema.yml │ │ ├── stats_players.sql │ │ ├── stats_team.sql │ │ ├── string_column_anomalies.sql │ │ ├── test_alerts_union.sql │ │ ├── users_per_day_weekly_seasonal.sql │ │ └── users_per_hour_daily_seasonal.sql │ ├── packages.yml │ ├── run_e2e_tests.py │ ├── run_unit_tests.py │ ├── snapshots │ │ └── failed_snapshot.sql │ └── tests │ │ ├── singular_test_with_no_ref.sql │ │ ├── singular_test_with_one_ref.sql │ │ ├── singular_test_with_source_ref.sql │ │ └── singular_test_with_two_refs.sql ├── docker-compose-trino.yml ├── docker-compose.yml ├── docker │ └── trino │ │ ├── catalog │ │ ├── iceberg.properties │ │ └── memory.properties │ │ └── etc │ │ ├── config.properties │ │ ├── jvm.config │ │ └── node.properties ├── requirements.txt └── tests │ ├── conftest.py │ ├── data_generator.py │ ├── data_seeder.py │ ├── dbt_flags.py │ ├── dbt_project.py │ ├── env.py │ ├── logger.py │ ├── pytest.ini │ ├── test_all_columns_anomalies.py │ ├── test_anomalies_backfill_logic.py │ ├── test_anomalies_ranges.py │ ├── test_anomaly_exclude_metrics.py │ ├── test_anomaly_test_configuration.py │ ├── test_collect_metrics.py │ ├── test_column_anomalies.py │ ├── test_dbt_artifacts │ ├── test_artifacts.py │ ├── test_columns.py │ └── test_groups.py │ ├── test_dimension_anomalies.py │ ├── test_disable_elementary.py │ ├── test_event_freshness_anomalies.py │ ├── test_exposure_schema_validity.py │ ├── test_failed_row_count.py │ ├── test_freshness_anomalies.py │ ├── test_jsonschema.py │ ├── test_long_strings.py │ ├── test_python.py │ ├── test_sampling.py │ ├── test_schema_changes.py │ ├── test_string_monitors.py │ └── test_volume_anomalies.py ├── macros ├── .gitkeep ├── commands │ ├── create_elementary_user.sql │ ├── create_indexes.sql │ ├── delete_duplicate_rows.sql │ ├── dump_table.sql │ ├── enforce_project_configurations.sql │ ├── generate_elementary_cli_profile.sql │ ├── generate_json_schema_test.sql │ ├── generate_schema_baseline_test.sql │ └── permissions │ │ ├── get_required_permissions.sql │ │ ├── information_schema_permissions.sql │ │ ├── query_history_permissions.sql │ │ └── validate_permissions.sql ├── edr │ ├── alerts │ │ ├── anomaly_detection_description.sql │ │ └── dbt_run_results_description.sql │ ├── data_monitoring │ │ ├── anomaly_detection │ │ │ ├── get_anomaly_scores_query.sql │ │ │ ├── store_anomaly_test_results.sql │ │ │ └── store_metrics_in_cache.sql │ │ ├── data_monitors_configuration │ │ │ ├── get_buckets_configuration.sql │ │ │ ├── get_column_monitors.sql │ │ │ └── get_table_monitors.sql │ │ ├── monitors │ │ │ ├── column_any_type_monitors.sql │ │ │ ├── column_boolean_monitors.sql │ │ │ ├── column_numeric_monitors.sql │ │ │ ├── column_string_monitors.sql │ │ │ ├── monitors.sql │ │ │ └── table_monitors.sql │ │ ├── monitors_query │ │ │ ├── column_monitoring_query.sql │ │ │ ├── dimension_monitoring_query.sql │ │ │ ├── get_latest_full_refresh.sql │ │ │ ├── get_start_bucket_in_data.sql │ │ │ └── table_monitoring_query.sql │ │ └── schema_changes │ │ │ ├── get_columns_changes_query.sql │ │ │ ├── get_columns_snapshot_query.sql │ │ │ ├── get_last_schema_changes_time.sql │ │ │ ├── store_schema_snapshot_tables_in_cache.sql │ │ │ └── store_schema_test_results.sql │ ├── dbt_artifacts │ │ ├── get_artifact_metadata_hash.sql │ │ ├── upload_artifacts_to_table.sql │ │ ├── upload_dbt_artifacts.sql │ │ ├── upload_dbt_columns.sql │ │ ├── upload_dbt_exposures.sql │ │ ├── upload_dbt_groups.sql │ │ ├── upload_dbt_invocation.sql │ │ ├── upload_dbt_metrics.sql │ │ ├── upload_dbt_models.sql │ │ ├── upload_dbt_seeds.sql │ │ ├── upload_dbt_snapshots.sql │ │ ├── upload_dbt_sources.sql │ │ ├── upload_dbt_tests.sql │ │ ├── upload_run_results.sql │ │ └── upload_source_freshness.sql │ ├── materializations │ │ └── test │ │ │ ├── failed_row_count.sql │ │ │ ├── test.sql │ │ │ └── test_result.sql │ ├── metadata_collection │ │ ├── get_columns_by_schemas.sql │ │ ├── get_columns_from_information_schema.sql │ │ ├── get_columns_in_project.sql │ │ ├── get_metric_properties.sql │ │ └── get_tables_from_information_schema.sql │ ├── system │ │ ├── configuration │ │ │ ├── get_configured_databases_from_graph.sql │ │ │ ├── get_configured_schemas_from_graph.sql │ │ │ └── is_elementary_enabled.sql │ │ ├── hooks │ │ │ ├── on_run_end.sql │ │ │ └── on_run_start.sql │ │ └── system_utils │ │ │ ├── buckets_cte.sql │ │ │ ├── clean_dbt_columns_temp_tables.sql │ │ │ ├── clean_elementary_temp_tables.sql │ │ │ ├── empty_table.sql │ │ │ ├── full_names.sql │ │ │ ├── get_config_var.sql │ │ │ ├── get_elementary_package_version.sql │ │ │ ├── get_first_env_var.sql │ │ │ ├── get_run_started_at.sql │ │ │ ├── get_runtime_config.sql │ │ │ ├── get_test_argument.sql │ │ │ ├── get_var.sql │ │ │ ├── logs.sql │ │ │ ├── no_results_query.sql │ │ │ └── times.sql │ └── tests │ │ ├── on_run_end │ │ ├── handle_tests_results.sql │ │ ├── union_columns_snapshot_query.sql │ │ └── union_metrics_query.sql │ │ ├── on_run_start │ │ ├── create_elementary_tests_schema.sql │ │ ├── ensure_materialize_override.sql │ │ ├── init_elementary_graph.sql │ │ └── recommend_dbt_core_artifacts_upgrade.sql │ │ ├── python.sql │ │ ├── test_ai_data_validation.sql │ │ ├── test_all_columns_anomalies.sql │ │ ├── test_collect_metrics.sql │ │ ├── test_column_anomalies.sql │ │ ├── test_configuration │ │ ├── get_anomalies_test_configuration.sql │ │ ├── get_anomaly_direction.sql │ │ ├── get_days_back.sql │ │ ├── get_detection_delay.sql │ │ ├── get_exclude_final_results.sql │ │ ├── get_model_baseline_columns.sql │ │ ├── get_period_vars.sql │ │ ├── get_seasonality.sql │ │ └── get_time_bucket.sql │ │ ├── test_dimension_anomalies.sql │ │ ├── test_event_freshness_anomalies.sql │ │ ├── test_exposure_schema_validity.sql │ │ ├── test_freshness_anomalies.sql │ │ ├── test_json_schema.sql │ │ ├── test_schema_changes.sql │ │ ├── test_schema_changes_from_baseline.sql │ │ ├── test_table_anomalies.sql │ │ ├── test_unstructured_data_validation.sql │ │ ├── test_utils │ │ ├── clean_elementary_test_tables.sql │ │ ├── clean_up_tables.sql │ │ ├── collect_column_metrics.sql │ │ ├── collect_table_metrics.sql │ │ ├── compile_py_code.sql │ │ ├── create_elementary_test_table.sql │ │ ├── create_model_baseline_table.sql │ │ ├── find_normalized_data_type_for_column.sql │ │ ├── get_anomaly_query.sql │ │ ├── get_elementary_test_table.sql │ │ ├── get_elementary_test_table_name.sql │ │ ├── get_elementary_tests_schema.sql │ │ ├── get_model_graph_node.sql │ │ ├── get_model_relation_for_test.sql │ │ ├── get_test_execution_id.sql │ │ ├── get_test_type.sql │ │ ├── get_test_unique_id.sql │ │ ├── run_python.sql │ │ └── validate_unique_metric_names.sql │ │ └── test_volume_anomalies.sql ├── materializations │ └── non_dbt.sql └── utils │ ├── command_type_utils.sql │ ├── common_test_configs.sql │ ├── cross_db_utils │ ├── can_query_relation.sql │ ├── concat.sql │ ├── contains.sql │ ├── current_timestamp.sql │ ├── date_trunc.sql │ ├── dateadd.sql │ ├── datediff.sql │ ├── day_of_week.sql │ ├── generate_elementary_profile_args.sql │ ├── generate_surrogate_key.sql │ ├── get_profile_creation_query.sql │ ├── hour_of_day.sql │ ├── hour_of_week.sql │ ├── incremental_strategy.sql │ ├── lag.sql │ ├── multi_value_in.sql │ ├── quote_column.sql │ ├── safe_cast.sql │ ├── schema_exists.sql │ ├── sql_union_distinct.sql │ ├── table_type.sql │ ├── target_database.sql │ ├── time_trunc.sql │ ├── timeadd.sql │ ├── timediff.sql │ └── to_char.sql │ ├── data_types │ ├── cast_column.sql │ ├── data_size.sql │ ├── data_type.sql │ ├── data_type_list.sql │ ├── get_column_data_type.sql │ ├── get_normalized_data_type.sql │ ├── is_column_timestamp.sql │ ├── normalize_data_type.sql │ ├── null_as.sql │ ├── to_primitive.sql │ └── try_cast_column_to_timestamp.sql │ ├── dict_utils │ ├── insensitive_get_dict_value.sql │ ├── safe_get_with_default_value.sql │ └── undefined_dict_keys_to_none.sql │ ├── graph │ ├── cache.sql │ ├── column_exists_in_relation.sql │ ├── get_compiled_code.sql │ ├── get_elementary_config_from_node.sql │ ├── get_elementary_relation.sql │ ├── get_model_database_and_schema_from_test_node.sql │ ├── get_node.sql │ ├── get_node_by_name.sql │ ├── get_node_execution_id.sql │ ├── get_nodes_by_unique_ids.sql │ ├── get_nodes_from_graph.sql │ ├── get_package_database_and_schema.sql │ ├── get_parent_model_unique_ids_from_test_node.sql │ ├── get_relation_from_node.sql │ ├── get_relevant_databases.sql │ ├── get_rendered_ref.sql │ ├── get_result_node.sql │ ├── get_run_result_dict.sql │ ├── get_table_name_from_node.sql │ ├── is_ephemeral_model.sql │ ├── is_incremental_model.sql │ └── set_cache.sql │ ├── list_utils │ ├── filter_none_and_sort.sql │ ├── join_list.sql │ ├── lists_intersection.sql │ ├── strings_list_to_tuple.sql │ └── union_lists.sql │ ├── log_macro_results.sql │ ├── missing_elementary_models.sql │ ├── percent_query.sql │ ├── run_queries │ ├── agate_to_dicts.sql │ ├── agate_to_json.sql │ ├── render_run_query.sql │ ├── result_column_to_list.sql │ ├── result_value.sql │ ├── run_query.sql │ └── union_macro_queries.sql │ ├── sql_utils │ ├── escape_select.sql │ ├── list_concat_with_separator.sql │ ├── min_max.sql │ └── to_sql_list.sql │ └── table_operations │ ├── create_intermediate_relation.sql │ ├── create_or_replace.sql │ ├── create_table_like.sql │ ├── create_temp_table.sql │ ├── delete_and_insert.sql │ ├── delete_if_incremental.sql │ ├── fully_drop_relation.sql │ ├── get_column_in_relation.sql │ ├── get_columns_and_types.sql │ ├── get_relation_max_length.sql │ ├── get_row_count.sql │ ├── get_timestamped_table_suffix.sql │ ├── has_temp_table_support.sql │ ├── insert_as_select.sql │ ├── insert_rows.sql │ ├── make_temp_relation.sql │ ├── merge_sql.sql │ ├── relation_exists.sql │ ├── remove_rows.sql │ ├── replace_table_data.sql │ └── table_name_with_suffix.sql ├── models ├── alerts_views.yml ├── dbt_artifacts.yml ├── edr │ ├── alerts │ │ ├── alerts_anomaly_detection.sql │ │ ├── alerts_dbt_models.sql │ │ ├── alerts_dbt_source_freshness.sql │ │ ├── alerts_dbt_tests.sql │ │ └── alerts_schema_changes.sql │ ├── data_monitoring │ │ ├── anomaly_detection │ │ │ ├── anomaly_threshold_sensitivity.sql │ │ │ └── metrics_anomaly_score.sql │ │ ├── data_monitoring │ │ │ └── data_monitoring_metrics.sql │ │ └── schema_changes │ │ │ └── schema_columns_snapshot.sql │ ├── dbt_artifacts │ │ ├── dbt_artifacts_hashes.sql │ │ ├── dbt_columns.sql │ │ ├── dbt_exposures.sql │ │ ├── dbt_groups.sql │ │ ├── dbt_invocations.sql │ │ ├── dbt_metrics.sql │ │ ├── dbt_models.sql │ │ ├── dbt_run_results.sql │ │ ├── dbt_seeds.sql │ │ ├── dbt_snapshots.sql │ │ ├── dbt_sources.sql │ │ └── dbt_tests.sql │ ├── run_results │ │ ├── dbt_source_freshness_results.sql │ │ ├── elementary_test_results.sql │ │ ├── job_run_results.sql │ │ ├── model_run_results.sql │ │ ├── seed_run_results.sql │ │ ├── snapshot_run_results.sql │ │ └── test_result_rows.sql │ └── system │ │ ├── metadata.sql │ │ └── monitors_runs.sql ├── elementary_tests.yml └── run_results.yml ├── packages.yml ├── snapshots └── .gitkeep └── tests └── .gitkeep /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | W503 4 | E203 5 | E501 6 | E402 7 | 8 | -------------------------------------------------------------------------------- /.github/workflows/remind-docs-and-tests.yml: -------------------------------------------------------------------------------- 1 | name: Remind docs and tests 2 | on: 3 | pull_request_target: 4 | branches: ["master"] 5 | jobs: 6 | run: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: wow-actions/auto-comment@v1 10 | with: 11 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 12 | pullRequestOpened: | 13 | 👋 @{{ author }} 14 | Thank you for raising your pull request. 15 | Please make sure to add tests and document all user-facing changes. 16 | You can do this by editing the `docs` files in the [`elementary`](https://github.com/elementary-data/elementary) repository. 17 | -------------------------------------------------------------------------------- /.github/workflows/run-precommit.yml: -------------------------------------------------------------------------------- 1 | name: Run pre-commit hooks 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | 6 | jobs: 7 | code-quality: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout Elementary 11 | uses: actions/checkout@v4 12 | 13 | - name: Set up Python 14 | uses: actions/setup-python@v4.3.0 15 | with: 16 | python-version: "3.8" 17 | 18 | - name: Install dev requirements 19 | run: pip install -r dev-requirements.txt 20 | 21 | - name: Run pre-commit hooks 22 | run: pre-commit run --all-files --show-diff-on-failure 23 | -------------------------------------------------------------------------------- /.github/workflows/test-all-warehouses-dbt-pre-releases.yml: -------------------------------------------------------------------------------- 1 | name: Test all warehouse platforms on dbt pre-releases 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | test: 7 | uses: ./.github/workflows/test-all-warehouses.yml 8 | secrets: inherit 9 | with: 10 | dbt-version: latest_pre 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dbt_packages/ 3 | logs/ 4 | scripts/ 5 | 6 | .idea 7 | .DS_Store 8 | 9 | edr.log 10 | integration_tests/data 11 | edr_target 12 | 13 | venv/ 14 | elementary*.html 15 | elementary*.json 16 | 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # Python notebooks 23 | .ipynb 24 | 25 | # vscode 26 | .vscode/ 27 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: "^dbt_packages$" 2 | 3 | repos: 4 | - repo: https://github.com/psf/black 5 | rev: 22.12.0 6 | hooks: 7 | - id: black 8 | 9 | - repo: https://github.com/pycqa/isort 10 | rev: 5.12.0 11 | hooks: 12 | - id: isort 13 | args: ["--profile", "black"] 14 | 15 | - repo: https://github.com/pycqa/flake8 16 | rev: 6.0.0 17 | hooks: 18 | - id: flake8 19 | 20 | - repo: https://github.com/pre-commit/mirrors-prettier 21 | rev: "v3.0.0" 22 | hooks: 23 | - id: prettier 24 | 25 | - repo: https://github.com/crate-ci/typos 26 | rev: v1.16.6 27 | hooks: 28 | - id: typos 29 | 30 | - repo: local 31 | hooks: 32 | - id: no_commit 33 | name: Check for NO_COMMIT marker 34 | entry: bash -c "git diff --cached -U0 | (! grep NO_COMMIT)" 35 | language: system 36 | -------------------------------------------------------------------------------- /analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/analyses/.gitkeep -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/data/.gitkeep -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "elementary" 2 | version: "0.18.3" 3 | 4 | require-dbt-version: [">=1.0.0", "<2.0.0"] 5 | 6 | config-version: 2 7 | profile: "elementary" 8 | 9 | model-paths: ["models"] 10 | analysis-paths: ["analyses"] 11 | test-paths: ["tests"] 12 | seed-paths: ["data"] 13 | macro-paths: ["macros"] 14 | snapshot-paths: ["snapshots"] 15 | 16 | target-path: "target" # directory which will store compiled SQL files 17 | clean-targets: # directories to be removed by `dbt clean` 18 | - "target" 19 | - "dbt_packages" 20 | - "dbt_modules" 21 | 22 | on-run-start: 23 | - "{{ elementary.on_run_start() }}" 24 | on-run-end: 25 | - "{{ elementary.on_run_end() }}" 26 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pre-commit 2 | 3 | -------------------------------------------------------------------------------- /integration_tests/README.md: -------------------------------------------------------------------------------- 1 | # dbt-data-reliability dbt Package Tests 2 | 3 | ## Usage 4 | 5 | 1. Start a Postgres instance on your machine using Docker: 6 | 7 | ```shell 8 | docker-compose up -d 9 | ``` 10 | 11 | 2. Add the following profile to your `profiles.yml`: 12 | 13 | ```shell 14 | elementary_tests: 15 | target: postgres 16 | outputs: 17 | postgres: 18 | type: postgres 19 | host: 127.0.0.1 20 | port: 5432 21 | user: admin 22 | password: admin 23 | dbname: postgres 24 | schema: edr 25 | threads: 32 26 | ``` 27 | 28 | 3. Install tests' requirements. 29 | 30 | ```shell 31 | pip install -r requirements.txt 32 | ``` 33 | 34 | 4. Install elementary-data 35 | 36 | `elementary-data` is required for testing. Install specific version if latest doesn't fit your needs. 37 | 38 | ```shell 39 | pip install elementary-data 40 | ``` 41 | 42 | 5. Run the tests. 43 | 44 | ```shell 45 | pytest tests -vvv -n8 46 | ``` 47 | 48 | ### Web Interface 49 | 50 | You can browse the database by visiting http://localhost:5433 in your browser. 51 | The credentials are: 52 | 53 | - **Email**: admin@admin.com 54 | - **Password**: admin 55 | 56 | It is also recommended to set the search path to your Elementary schema by running: `SET search_path = edr_elementary`. 57 | That will allow you to do `SELECT * FROM dbt_models` rather than `SELECT * FROM edr_elementary.dbt_models`. 58 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | models/tmp 3 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/dbt_project/data/.gitkeep -------------------------------------------------------------------------------- /integration_tests/dbt_project/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "elementary_tests" 2 | version: "1.0.0" 3 | config-version: 2 4 | profile: "elementary_tests" 5 | 6 | model-paths: ["models"] 7 | analysis-paths: ["analyses"] 8 | test-paths: ["tests"] 9 | seed-paths: ["data", "seeds"] 10 | macro-paths: ["macros"] 11 | snapshot-paths: ["snapshots"] 12 | 13 | target-path: "target" # directory which will store compiled SQL files 14 | clean-targets: # directories to be removed by `dbt clean` 15 | - "target" 16 | - "dbt_packages" 17 | - "dbt_modules" 18 | 19 | vars: 20 | debug_logs: "{{ env_var('DBT_EDR_DEBUG', False) }}" 21 | mute_ensure_materialization_override: true 22 | 23 | models: 24 | elementary_tests: 25 | tmp: 26 | +materialized: table 27 | 28 | elementary: 29 | +schema: elementary 30 | +enabled: "{{ var('elementary_enabled', True) }}" 31 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/debug.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export DBT_EDR_DEBUG=1 4 | export DBT_MACRO_DEBUGGING=1 5 | 6 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/macros/clear_env.sql: -------------------------------------------------------------------------------- 1 | {% macro clear_env() %} 2 | {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} 3 | {% do elementary_tests.edr_drop_schema(database_name, schema_name) %} 4 | {% do elementary_tests.edr_drop_schema(elementary.target_database(), generate_schema_name()) %} 5 | {% endmacro %} 6 | 7 | {% macro edr_drop_schema(database_name, schema_name) %} 8 | {% do return(adapter.dispatch('edr_drop_schema', 'elementary_tests')(database_name, schema_name)) %} 9 | {% endmacro %} 10 | 11 | {% macro default__edr_drop_schema(database_name, schema_name) %} 12 | {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name) %} 13 | {% do dbt.drop_schema(schema_relation) %} 14 | {% do adapter.commit() %} 15 | {% endmacro %} 16 | 17 | {% macro clickhouse__edr_drop_schema(database_name, schema_name) %} 18 | {% do run_query("DROP DATABASE IF EXISTS " ~ schema_name) %} 19 | {% do adapter.commit() %} 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/macros/dbg.sql: -------------------------------------------------------------------------------- 1 | {% macro dbg() %} 2 | {% do debug() %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/macros/generate_schema_name.sql: -------------------------------------------------------------------------------- 1 | {% macro generate_schema_name(custom_schema_name, node) -%} 2 | {%- set schema_name = target.schema -%} 3 | {% if custom_schema_name %} 4 | {% set schema_name = "{}_{}".format(schema_name, custom_schema_name) %} 5 | {% endif %} 6 | 7 | {% set schema_name_suffix_by_var = var('schema_name_suffix', '') %} 8 | {% if schema_name_suffix_by_var %} 9 | {% set schema_name = schema_name + schema_name_suffix_by_var %} 10 | {% endif %} 11 | 12 | {% do return(schema_name) %} 13 | {%- endmacro %} 14 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/macros/get_anomaly_config.sql: -------------------------------------------------------------------------------- 1 | {% macro get_anomaly_config(model_config, config) %} 2 | {{ return(adapter.dispatch('get_anomaly_config', 'elementary')(model_config, config)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_anomaly_config(model_config, config) %} 6 | {% set mock_model = { 7 | "alias": "mock_model", 8 | "config": { 9 | "elementary": model_config 10 | } 11 | } %} 12 | {# trick elementary into thinking this is the running model #} 13 | {% do context.update({ 14 | "model": { 15 | "depends_on": { 16 | "nodes": ["id"] 17 | } 18 | }, 19 | "graph": { 20 | "nodes": { 21 | "id": mock_model 22 | } 23 | } 24 | }) %} 25 | {% do return(elementary.get_anomalies_test_configuration(api.Relation.create("db", "schema", "mock_model"), **config)[0]) %} 26 | {% endmacro %} 27 | 28 | {% macro clickhouse__get_anomaly_config(model_config, config) %} 29 | {% set mock_model = { 30 | "alias": "mock_model", 31 | "config": { 32 | "elementary": model_config 33 | } 34 | } %} 35 | {# trick elementary into thinking this is the running model #} 36 | {% do context.update({ 37 | "model": { 38 | "depends_on": { 39 | "nodes": ["id"] 40 | } 41 | }, 42 | "graph": { 43 | "nodes": { 44 | "id": mock_model 45 | } 46 | } 47 | }) %} 48 | {% do return(elementary.get_anomalies_test_configuration(api.Relation.create("schema", "schema", "mock_model"), **config)[0]) %} 49 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/dbt_project/macros/materializations.sql: -------------------------------------------------------------------------------- 1 | {% materialization test, default %} 2 | {% if var('enable_elementary_test_materialization', false) %} 3 | {% do return(elementary.materialization_test_default.call_macro()) %} 4 | {% else %} 5 | {% do return(dbt.materialization_test_default.call_macro()) %} 6 | {% endif %} 7 | {% endmaterialization %} 8 | 9 | {% materialization test, adapter="snowflake" %} 10 | {% if var('enable_elementary_test_materialization', false) %} 11 | {% do return(elementary.materialization_test_snowflake.call_macro()) %} 12 | {% else %} 13 | {% if dbt.materialization_test_snowflake %} 14 | {% do return(dbt.materialization_test_snowflake.call_macro()) %} 15 | {% else %} 16 | {% do return(dbt.materialization_test_default.call_macro()) %} 17 | {% endif %} 18 | {% endif %} 19 | {% endmaterialization %} 20 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/macros/python.sql: -------------------------------------------------------------------------------- 1 | {% macro python_mock_test(args) %} 2 | def test(model_df, ref, session): 3 | return {{ args.result }} 4 | {% endmacro %} 5 | 6 | {% macro python_return_df(args) %} 7 | def test(model_df, ref, session): 8 | return model_df 9 | {% endmacro %} 10 | 11 | {% macro python_return_empty_df(args) %} 12 | def test(model_df, ref, session): 13 | col_name = model_df.columns[0] 14 | col = model_df[col_name] 15 | return model_df[col == "blablablabla"] 16 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/customers.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref('stg_customers') }} 2 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/exposures.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | exposures: 4 | - name: customers 5 | label: CustomersFTW 6 | type: dashboard 7 | maturity: high 8 | url: https://bi.tool/dashboards/1 9 | description: > 10 | Did someone say "exponential growth"? 11 | 12 | depends_on: 13 | - ref('customers') 14 | 15 | owner: 16 | name: Callum McData 17 | email: data@jaffleshop.com 18 | meta: 19 | referenced_columns: 20 | - column_name: id 21 | data_type: numeric 22 | node: ref('customers') 23 | 24 | - name: orders 25 | label: Returned Orders 26 | type: dashboard 27 | maturity: high 28 | url: https://bi.tool/dashboards/2 29 | description: > 30 | Did someone say "exponential growth"? 31 | 32 | depends_on: 33 | - ref('orders') 34 | 35 | owner: 36 | name: Callum McData 37 | email: data@jaffleshop.com 38 | meta: 39 | referenced_columns: 40 | - column_name: "order_id" 41 | data_type: "string" 42 | - column_name: "ZOMG" 43 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/metrics/python/metrics_python_table.py: -------------------------------------------------------------------------------- 1 | def model(dbt, session): 2 | dbt.config(materialized="table") 3 | return dbt.source("test_data", "metrics_seed3") 4 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/metrics/sql/metrics_incremental.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized="incremental") }} 2 | 3 | select * from {{ source("test_data", "metrics_seed2") }} -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/metrics/sql/metrics_table.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized="table") }} 2 | 3 | select * from {{ source("test_data", "metrics_seed1") }} 4 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/metrics/sql/metrics_view.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized="view") }} 2 | 3 | select * from {{ source("test_data", "metrics_seed1") }} 4 | union all 5 | select * from {{ source("test_data", "metrics_seed2") }} 6 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/one.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table', 4 | tags=var('one_tags', []), 5 | meta={'owner': var('one_owner', 'egk')} 6 | ) 7 | }} 8 | 9 | SELECT 1 AS one 10 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/orders.sql: -------------------------------------------------------------------------------- 1 | select order_id, customer_id, amount from {{ ref('stg_orders') }} 2 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: customers 5 | description: This table has basic information about a customer, as well as some derived facts based on a customer's orders 6 | tests: 7 | - elementary.exposure_schema_validity: 8 | tags: [exposure_customers] 9 | 10 | columns: 11 | - name: id 12 | description: This is a unique identifier for a customer 13 | 14 | - name: name 15 | data_type: string 16 | description: Customer's name. 17 | 18 | - name: orders 19 | description: This table has basic information about orders, as well as some derived facts based on payments 20 | 21 | tests: 22 | - elementary.exposure_schema_validity: 23 | tags: [exposure_orders] 24 | 25 | columns: 26 | - name: order_id 27 | description: This is a unique identifier for an order 28 | 29 | - name: customer_id 30 | description: Foreign key to the customers table 31 | 32 | - name: order_date 33 | description: Date (UTC) that the order was placed 34 | 35 | - name: amount 36 | description: Total amount (AUD) of the order 37 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/test_data.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: test_data 5 | schema: "{{ target.schema + var('schema_name_suffix', '') }}" 6 | tables: 7 | - name: metrics_seed1 8 | - name: metrics_seed2 9 | - name: metrics_seed3 10 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/models/tmp/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/dbt_project/models/tmp/.gitkeep -------------------------------------------------------------------------------- /integration_tests/dbt_project/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../../ 3 | - package: dbt-labs/dbt_utils 4 | version: | 5 | {%- set minor_to_utils_range_map = { 6 | "0": [">=0.8.0", "<0.9.0"], 7 | "1": [">=0.8.0", "<0.9.0"], 8 | "2": [">=0.8.0", "<1.0.0"], 9 | } -%} 10 | {{- minor_to_utils_range_map.get(dbt_version.split('.')[1], [">=0.8.0", "<2.0.0"]) -}} 11 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/seeds/stg_customers.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Erik 3 | 2,Zaadi 4 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/seeds/stg_orders.csv: -------------------------------------------------------------------------------- 1 | order_id,customer_id,amount 2 | 1,1,42 3 | 2,1,42 4 | 3,1,42 5 | 4,1,42 6 | 5,2,42 7 | -------------------------------------------------------------------------------- /integration_tests/dbt_project/selectors.yml: -------------------------------------------------------------------------------- 1 | selectors: 2 | - name: init 3 | definition: one 4 | 5 | - name: one 6 | definition: one 7 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/README.md: -------------------------------------------------------------------------------- 1 | # dbt-data-reliability dbt Package Tests 2 | 3 | ## Usage 4 | 5 | 1. Start a Postgres instance on your machine using Docker: 6 | 7 | ```shell 8 | docker-compose up -d 9 | ``` 10 | 11 | 2. Add the following profile to your `profiles.yml`: 12 | 13 | ```shell 14 | elementary_tests: 15 | target: postgres 16 | outputs: 17 | postgres: 18 | type: postgres 19 | host: 127.0.0.1 20 | port: 5432 21 | user: admin 22 | password: admin 23 | dbname: postgres 24 | schema: edr 25 | threads: 32 26 | ``` 27 | 28 | 3. Load data into the database: 29 | 30 | ```shell 31 | python generate_data.py 32 | dbt seed 33 | ``` 34 | 35 | 4. Run the tests. 36 | 37 | ```shell 38 | python run_e2e_tests.py 39 | ``` 40 | 41 | ### Web Interface 42 | 43 | You can browse the database by visiting http://localhost:5433 in your browser. 44 | The credentials are: 45 | 46 | - **Email**: admin@admin.com 47 | - **Password**: admin 48 | 49 | It is also recommended to set the search path to your Elementary schema by running: `SET search_path = edr_elementary`. 50 | That will allow you to do `SELECT * FROM dbt_models` rather than `SELECT * FROM edr_elementary.dbt_models`. 51 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/training/any_type_column_anomalies_training.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/training/any_type_column_anomalies_training.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/training/backfill_days_column_anomalies_training.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/training/backfill_days_column_anomalies_training.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/training/dimension_anomalies_training.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/training/dimension_anomalies_training.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/training/groups_training.csv: -------------------------------------------------------------------------------- 1 | group_a,group_b,group_c,group_d 2 | Poland,Netherlands,Spain,Ukraine 3 | Greece,Denmark,Italy,Sweden 4 | Russia,Germany,Ireland,France 5 | Czech Republic,Portugal,Croatia,England 6 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/training/numeric_column_anomalies_training.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/training/numeric_column_anomalies_training.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/training/string_column_anomalies_training.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/training/string_column_anomalies_training.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/any_type_column_anomalies_validation.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/validation/any_type_column_anomalies_validation.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/backfill_days_column_anomalies_validation.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/validation/backfill_days_column_anomalies_validation.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/dimension_anomalies_validation.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/validation/dimension_anomalies_validation.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/groups_validation.csv: -------------------------------------------------------------------------------- 1 | group_b,group_c,group_d 2 | Netherlands,Spain,Ukraine 3 | Denmark,Italy,Sweden 4 | Germany,Ireland,France 5 | Portugal,Croatia,England 6 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/numeric_column_anomalies_validation.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/validation/numeric_column_anomalies_validation.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/stats_players_validation.csv: -------------------------------------------------------------------------------- 1 | key_crosses,red_cards,Player,Team,Goals,shots_on_target,Passes,Shots_without_scoring,Crosses,Tackles,Interceptions,Goals_conceded,Saves_made,Yellow_cards 2 | bla,bla,Darijo Srna,Croatia,2,4,5,7,16,13,4,4,5,1 3 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/data/validation/string_column_anomalies_validation.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/integration_tests/deprecated_tests/data/validation/string_column_anomalies_validation.csv -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "elementary_integration_tests" 2 | version: "1.0.0" 3 | config-version: 2 4 | profile: "elementary_tests" 5 | 6 | model-paths: ["models"] 7 | analysis-paths: ["analyses"] 8 | test-paths: ["tests"] 9 | seed-paths: ["data"] 10 | macro-paths: ["macros"] 11 | snapshot-paths: ["snapshots"] 12 | 13 | target-path: "target" # directory which will store compiled SQL files 14 | clean-targets: # directories to be removed by `dbt clean` 15 | - "target" 16 | - "dbt_packages" 17 | - "dbt_modules" 18 | 19 | vars: 20 | days_back: 30 21 | debug_logs: "{{ env_var('DBT_EDR_DEBUG', False) }}" 22 | custom_run_started_at: "{{ modules.datetime.datetime.utcfromtimestamp(0) }}" 23 | mute_ensure_materialization_override: true 24 | 25 | seeds: 26 | +schema: test_seeds 27 | 28 | models: 29 | elementary: 30 | +schema: elementary 31 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/debug.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export DBT_EDR_DEBUG=1 4 | export DBT_MACRO_DEBUGGING=1 5 | 6 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/asserts.sql: -------------------------------------------------------------------------------- 1 | {% macro assert_value(value, expected_value) %} 2 | {% if value != expected_value %} 3 | {% do elementary.edr_log("FAILED: value " ~ value ~ " does not equal to " ~ expected_value) %} 4 | {{ return(1) }} 5 | {% else %} 6 | {% do elementary.edr_log("SUCCESS") %} 7 | {{ return(0) }} 8 | {% endif %} 9 | {% endmacro %} 10 | 11 | {% macro assert_str_in_value(str, value) %} 12 | {% if str not in value %} 13 | {% do elementary.edr_log("FAILED: the string " ~ str ~ " was not found in " ~ value) %} 14 | {{ return(1) }} 15 | {% else %} 16 | {% do elementary.edr_log("SUCCESS") %} 17 | {{ return(0) }} 18 | {% endif %} 19 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/e2e_tests/utils/table_assertions.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro assert_empty_table(table, context='') %} 3 | {% if table | length > 0 %} 4 | {% do elementary.edr_log(context ~ " FAILED: Table not empty.") %} 5 | {% do table.print_table() %} 6 | {{ return(1) }} 7 | {% endif %} 8 | {% do elementary.edr_log(context ~ " SUCCESS: Table is empty.") %} 9 | {{ return(0) }} 10 | {% endmacro %} 11 | 12 | {% macro assert_table_doesnt_exist(model_name) %} 13 | {% if load_relation(ref(model_name)) is none %} 14 | {% do elementary.edr_log(model_name ~ " SUCCESS: Table doesn't exist.") %} 15 | {{ return(0) }} 16 | {% endif %} 17 | {% do elementary.edr_log(context ~ " FAILED: Table exists.") %} 18 | {{ return(1) }} 19 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/e2e_tests/validate_backfill_days.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_backfill_days() %} 2 | {% set alerts_relation = ref('alerts_anomaly_detection') %} 3 | {% set string_column_alerts %} 4 | select column_name 5 | from {{ alerts_relation }} 6 | where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) and upper(table_name) = 'BACKFILL_DAYS_COLUMN_ANOMALIES' 7 | {% endset %} 8 | {% set results = elementary.result_column_to_list(string_column_alerts) %} 9 | {{ assert_lists_contain_same_items(results, ['min_length']) }} 10 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/e2e_tests/validate_config_levels.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_config_levels() %} 2 | {% set alerts_relation = ref('test_alerts_union') %} 3 | 4 | {% set config_levels_validation_query %} 5 | with error_tests as ( 6 | select 7 | table_name, alert_description, 8 | {{ elementary.contains('tags', 'config_levels') }} as is_config_levels_tag 9 | from {{ alerts_relation }} 10 | where status = 'error' 11 | ) 12 | select table_name, alert_description 13 | from error_tests 14 | where is_config_levels_tag = true 15 | {% endset %} 16 | {% set results = elementary.run_query(config_levels_validation_query) %} 17 | {{ assert_empty_table(results) }} 18 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/e2e_tests/validate_freshness_anomalies.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_event_freshness_anomalies() %} 2 | {%- set max_bucket_end = elementary.edr_quote(elementary.get_run_started_at().strftime("%Y-%m-%d 00:00:00")) %} 3 | {% set alerts_relation = ref('alerts_anomaly_detection') %} 4 | {% set freshness_validation_query %} 5 | select distinct table_name 6 | from {{ alerts_relation }} 7 | where sub_type = 'event_freshness' and detected_at >= {{elementary.edr_cast_as_timestamp(max_bucket_end) }} 8 | {% endset %} 9 | 10 | {% set results = elementary.result_column_to_list(freshness_validation_query) %} 11 | {{ assert_lists_contain_same_items(results, ['string_column_anomalies', 12 | 'numeric_column_anomalies', 13 | 'string_column_anomalies_training']) }} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/e2e_tests/validate_seasonal_volume_anomalies.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_seasonal_volume_anomalies() %} 2 | {% set query %} 3 | select test_alias, status 4 | from {{ ref('elementary_test_results') }} 5 | where table_name in ('users_per_day_weekly_seasonal', 'users_per_hour_daily_seasonal') 6 | {% endset %} 7 | {% set results = elementary.run_query(query) %} 8 | {{ assert_lists_contain_same_items(results, [ 9 | ('day_of_week_volume_anomalies_no_seasonality', 'fail'), 10 | ('day_of_week_volume_anomalies_with_seasonality', 'pass'), 11 | ('hour_of_day_volume_anomalies_with_seasonality', 'pass'), 12 | ('hour_of_day_volume_anomalies_no_seasonality', 'fail'), 13 | ('hour_of_week_volume_anomalies_no_seasonality', 'fail'), 14 | ('hour_of_week_volume_anomalies_with_seasonality', 'pass') 15 | ]) }} 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/e2e_tests/validate_table_anomalies.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_table_anomalies() %} 2 | -- no validation data which means table freshness and volume should alert 3 | {% set alerts_relation = ref('alerts_anomaly_detection') %} 4 | {% set freshness_validation_query %} 5 | select distinct table_name 6 | from {{ alerts_relation }} 7 | where status in ('fail', 'warn') and sub_type = 'freshness' 8 | {% endset %} 9 | {% set results = elementary.result_column_to_list(freshness_validation_query) %} 10 | {{ assert_lists_contain_same_items(results, ['string_column_anomalies', 11 | 'numeric_column_anomalies', 12 | 'string_column_anomalies_training']) }} 13 | {% set row_count_validation_query %} 14 | select distinct table_name 15 | from {{ alerts_relation }} 16 | where status in ('fail', 'warn') and sub_type = 'row_count' 17 | {% endset %} 18 | {% set results = elementary.result_column_to_list(row_count_validation_query) %} 19 | {{ assert_lists_contain_same_items(results, ['users_per_hour_daily_seasonal', 20 | 'users_per_day_weekly_seasonal', 21 | 'any_type_column_anomalies', 22 | 'numeric_column_anomalies', 23 | 'string_column_anomalies_training']) }} 24 | 25 | {% endmacro %} 26 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/generic_tests/generic_test_on_column.sql: -------------------------------------------------------------------------------- 1 | {%- test generic_test_on_column(model, column_name) -%} 2 | {% set query_with_rows %} 3 | with nothing as (select 1 as num) 4 | select * from nothing where num = 1 5 | {%- endset -%} 6 | {{ query_with_rows }} 7 | {%- endtest -%} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/generic_tests/generic_test_on_model.sql: -------------------------------------------------------------------------------- 1 | {%- test generic_test_on_model(model) -%} 2 | {% set query_with_rows %} 3 | with nothing as (select 1 as num) 4 | select * from nothing where num = 1 5 | {%- endset -%} 6 | {{ query_with_rows }} 7 | {%- endtest -%} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/system/dbg.sql: -------------------------------------------------------------------------------- 1 | {% macro dbg() %} 2 | {% do debug() %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/system/generate_schema_name.sql: -------------------------------------------------------------------------------- 1 | {% macro generate_schema_name(custom_schema_name, node) -%} 2 | {%- set default_schema = target.schema -%} 3 | {% if not custom_schema_name %} 4 | {% do return(default_schema) %} 5 | {% endif %} 6 | 7 | {% if node.resource_type == "seed" %} 8 | {% do return(custom_schema_name) %} 9 | {% endif %} 10 | 11 | {% do return("{}_{}".format(default_schema, custom_schema_name)) %} 12 | {%- endmacro %} 13 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/system/materializations.sql: -------------------------------------------------------------------------------- 1 | {% materialization test, default %} 2 | {% do return(elementary.materialization_test_default.call_macro()) %} 3 | {% endmaterialization %} 4 | 5 | {% materialization test, adapter="snowflake" %} 6 | {% do return(elementary.materialization_test_snowflake.call_macro()) %} 7 | {% endmaterialization %} 8 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/system/read_table.sql: -------------------------------------------------------------------------------- 1 | {% macro read_table(table, where=none, column_names=none) %} 2 | {% set query %} 3 | select 4 | {% if column_names %} 5 | {{ elementary.escape_select(column_names) }} 6 | {% else %} 7 | * 8 | {% endif %} 9 | from {{ ref(table) }} 10 | {% if where %} 11 | where {{ where }} 12 | {% endif %} 13 | {% endset %} 14 | 15 | {% set results = elementary.run_query(query) %} 16 | {% set results_json = elementary.agate_to_json(results) %} 17 | {% do elementary.edr_log(results_json) %} 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/system/return_config_var.sql: -------------------------------------------------------------------------------- 1 | {# Logging the wanted config var as an elementary log (using elementary.edr_log) #} 2 | {# The dbtRunner catch this log when executed with run_operation #} 3 | {# This is used for accessing the integration tests vars #} 4 | {% macro return_config_var(var_name) %} 5 | {{ elementary.edr_log(elementary.get_config_var(var_name)) }} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql: -------------------------------------------------------------------------------- 1 | {% macro test_adapter_specific_macros_have_default_implementation() %} 2 | {% set no_default_macros = [] %} 3 | {% set elementary_macros = elementary.keys() %} 4 | {% for macro in elementary_macros %} 5 | {% set parts = macro.split("__") %} 6 | {% if parts | length == 2 %} 7 | {% set adapter, macro_name = parts %} 8 | {% if macro_name not in no_default_macros and "default__{}".format(macro_name) not in elementary_macros %} 9 | {% do no_default_macros.append(macro_name) %} 10 | {% endif %} 11 | {% endif %} 12 | {% endfor %} 13 | {{ assert_lists_contain_same_items(no_default_macros, [], "no_default_macros") }} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/any_type_column_anomalies.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ ref('any_type_column_anomalies_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ ref('any_type_column_anomalies_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | occurred_at, 25 | null_count_str, 26 | null_percent_str, 27 | null_count_float, 28 | null_percent_float, 29 | null_count_int, 30 | null_percent_int, 31 | null_count_bool, 32 | null_percent_bool 33 | from source 34 | ) 35 | 36 | select * from final 37 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/backfill_days_column_anomalies.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ ref('backfill_days_column_anomalies_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ ref('backfill_days_column_anomalies_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | occurred_at, 25 | min_length 26 | from source 27 | ) 28 | 29 | select * from final 30 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/config_levels_project.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref('any_type_column_anomalies_validation') }} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/config_levels_test_and_model.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref('any_type_column_anomalies_validation') }} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/copy_numeric_column_anomalies.sql: -------------------------------------------------------------------------------- 1 | select * from {{ ref("numeric_column_anomalies") }} 2 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/dimension_anomalies.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ ref('dimension_anomalies_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ ref('dimension_anomalies_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | platform, 25 | version, 26 | user_id 27 | from source 28 | ) 29 | 30 | select * from final 31 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/ephemeral_model.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='ephemeral' 4 | ) 5 | }} 6 | 7 | select * from {{ ref('any_type_column_anomalies_training') }} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/error_model.sql: -------------------------------------------------------------------------------- 1 | select 'a's as string 2 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/groups.sql: -------------------------------------------------------------------------------- 1 | {% if var("stage") == "training" %} 2 | select * from {{ ref('groups_training') }} 3 | {% elif var("stage") == "validation" %} 4 | select * from {{ ref('groups_validation') }} 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/nested/models/tree/nested.sql: -------------------------------------------------------------------------------- 1 | select 1 as one 2 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/no_timestamp_anomalies.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ ref('any_type_column_anomalies_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ ref('any_type_column_anomalies_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | occurred_at, 25 | null_count_str, 26 | null_percent_str, 27 | null_count_float, 28 | null_percent_float, 29 | null_count_int, 30 | null_percent_int, 31 | null_count_bool, 32 | null_percent_bool 33 | from source 34 | ) 35 | 36 | select * from final 37 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/non_dbt_model.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='non_dbt') }} 2 | SELECT 1 3 | -- depends_on: {{ ref('one') }} -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/numeric_column_anomalies.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ ref('numeric_column_anomalies_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ ref('numeric_column_anomalies_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | occurred_at, 25 | min, 26 | max, 27 | zero_count, 28 | zero_percent, 29 | average, 30 | standard_deviation, 31 | variance, 32 | sum 33 | from source 34 | ) 35 | 36 | select * from final 37 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/one.sql: -------------------------------------------------------------------------------- 1 | select 1 as one 2 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/stats_players.sql: -------------------------------------------------------------------------------- 1 | {% if var("stage") == "training" %} 2 | select * from {{ ref('stats_players_training') }} 3 | {% elif var("stage") == "validation" %} 4 | select * from {{ ref('stats_players_validation') }} 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/stats_team.sql: -------------------------------------------------------------------------------- 1 | {% if var("stage") == "training" %} 2 | select * from {{ ref('stats_team_training') }} 3 | {% elif var("stage") == "validation" %} 4 | select * from {{ ref('stats_team_validation') }} 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/string_column_anomalies.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ ref('string_column_anomalies_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ ref('string_column_anomalies_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | occurred_at, 25 | min_length, 26 | max_length, 27 | average_length, 28 | missing_count, 29 | missing_percent 30 | from source 31 | ) 32 | 33 | select * from final 34 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/test_alerts_union.sql: -------------------------------------------------------------------------------- 1 | with dbt as ( 2 | select * from {{ ref('alerts_dbt_tests') }} 3 | ), 4 | {%- if target.type != 'databricks' %} 5 | schema_changes as ( 6 | select * from {{ ref('alerts_schema_changes') }} 7 | ), 8 | {%- endif %} 9 | anomalies as ( 10 | select * from {{ ref('alerts_anomaly_detection') }} 11 | ) 12 | select * from dbt 13 | union all 14 | select * from anomalies 15 | {%- if target.type != 'databricks' %} 16 | union all 17 | select * from schema_changes 18 | {%- endif %} 19 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/users_per_day_weekly_seasonal.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ source('training', 'users_per_day_weekly_seasonal_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ source('validation', 'users_per_day_weekly_seasonal_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | user_id 25 | from source 26 | ) 27 | 28 | select * from final 29 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/models/users_per_hour_daily_seasonal.sql: -------------------------------------------------------------------------------- 1 | with training as ( 2 | select * from {{ source('training', 'users_per_hour_daily_seasonal_training') }} 3 | ), 4 | 5 | {% if var("stage") == "validation" %} 6 | validation as ( 7 | select * from {{ source('validation', 'users_per_hour_daily_seasonal_validation') }} 8 | ), 9 | 10 | source as ( 11 | select * from training 12 | union all 13 | select * from validation 14 | ), 15 | {% else %} 16 | source as ( 17 | select * from training 18 | ), 19 | {% endif %} 20 | 21 | final as ( 22 | select 23 | updated_at, 24 | user_id 25 | from source 26 | ) 27 | 28 | select * from final 29 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../../ 3 | - package: dbt-labs/dbt_utils 4 | version: | 5 | {%- set minor_to_utils_range_map = { 6 | "0": [">=0.8.0", "<0.9.0"], 7 | "1": [">=0.8.0", "<0.9.0"], 8 | "2": [">=0.8.0", "<1.0.0"], 9 | } -%} 10 | {{- minor_to_utils_range_map.get(dbt_version.split('.')[1], [">=0.8.0", "<2.0.0"]) -}} 11 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/snapshots/failed_snapshot.sql: -------------------------------------------------------------------------------- 1 | {% snapshot failed_snapshot() %} 2 | 3 | {{ 4 | config( 5 | target_schema='snapshots', 6 | unique_key='unique_id', 7 | strategy='timestamp', 8 | updated_at='generated_at', 9 | ) 10 | }} 11 | SELECT FAILED_SNAPSHOT 12 | {% endsnapshot %} 13 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/tests/singular_test_with_no_ref.sql: -------------------------------------------------------------------------------- 1 | {% set relation = api.Relation.create(database=elementary.target_database(), schema=target.schema, identifier='numeric_column_anomalies') %} 2 | select min from {{ relation }} where min < 100 3 | -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/tests/singular_test_with_one_ref.sql: -------------------------------------------------------------------------------- 1 | select min from {{ ref('numeric_column_anomalies') }} where min < 100 -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/tests/singular_test_with_source_ref.sql: -------------------------------------------------------------------------------- 1 | select min from {{ source('training', 'numeric_column_anomalies_training') }} where min < 105 -------------------------------------------------------------------------------- /integration_tests/deprecated_tests/tests/singular_test_with_two_refs.sql: -------------------------------------------------------------------------------- 1 | with min_len_issues as ( 2 | select null_count_int as min_issue from {{ ref('any_type_column_anomalies') }} where null_count_int < 100 3 | ), 4 | 5 | min_issues as ( 6 | select min as min_issue from {{ ref('numeric_column_anomalies') }} where min < 100 7 | ), 8 | 9 | all_issues as ( 10 | select * from min_len_issues 11 | union all 12 | select * from min_issues 13 | ) 14 | 15 | select * from all_issues 16 | -------------------------------------------------------------------------------- /integration_tests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | 3 | services: 4 | postgres: 5 | image: postgres 6 | ports: 7 | - "127.0.0.1:5432:5432" 8 | command: postgres -c max_connections=500 9 | environment: 10 | POSTGRES_USER: admin 11 | POSTGRES_PASSWORD: admin 12 | volumes: 13 | - postgres:/var/lib/postgresql/data 14 | 15 | clickhouse: 16 | image: clickhouse/clickhouse-server:latest 17 | container_name: clickhouse 18 | ports: 19 | - "8123:8123" 20 | - "9000:9000" 21 | volumes: 22 | - ./clickhouse-data:/var/lib/clickhouse 23 | environment: 24 | CLICKHOUSE_DB: default 25 | CLICKHOUSE_USER: default 26 | CLICKHOUSE_PASSWORD: "default" 27 | ulimits: 28 | nofile: 29 | soft: 262144 30 | hard: 262144 31 | 32 | pgadmin: 33 | image: dpage/pgadmin4 34 | ports: 35 | - "127.0.0.1:5433:80" 36 | environment: 37 | PGADMIN_DEFAULT_EMAIL: admin@admin.com 38 | PGADMIN_DEFAULT_PASSWORD: admin 39 | volumes: 40 | - pgadmin:/var/lib/pgadmin 41 | 42 | volumes: 43 | postgres: 44 | pgadmin: 45 | -------------------------------------------------------------------------------- /integration_tests/docker/trino/catalog/iceberg.properties: -------------------------------------------------------------------------------- 1 | connector.name=iceberg 2 | hive.metastore.uri=thrift://hive-metastore:9083 3 | hive.s3.endpoint=http://minio:9000 4 | hive.s3.path-style-access=true 5 | hive.s3.aws-access-key=minio 6 | hive.s3.aws-secret-key=minio123 7 | hive.metastore-cache-ttl=0s 8 | hive.metastore-refresh-interval=5s 9 | hive.metastore-timeout=60s 10 | -------------------------------------------------------------------------------- /integration_tests/docker/trino/catalog/memory.properties: -------------------------------------------------------------------------------- 1 | connector.name=memory 2 | memory.max-data-per-node=128MB 3 | -------------------------------------------------------------------------------- /integration_tests/docker/trino/etc/config.properties: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator=true 3 | http-server.http.port=8080 4 | discovery.uri=http://localhost:8080 5 | -------------------------------------------------------------------------------- /integration_tests/docker/trino/etc/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -XX:InitialRAMPercentage=80 3 | -XX:MaxRAMPercentage=80 4 | -XX:G1HeapRegionSize=32M 5 | -XX:+ExplicitGCInvokesConcurrent 6 | -XX:+HeapDumpOnOutOfMemoryError 7 | -XX:+ExitOnOutOfMemoryError 8 | -XX:-OmitStackTraceInFastThrow 9 | -XX:ReservedCodeCacheSize=256M 10 | -XX:PerMethodRecompilationCutoff=10000 11 | -XX:PerBytecodeRecompilationCutoff=10000 12 | -Djdk.attach.allowAttachSelf=true 13 | -Djdk.nio.maxCachedBufferSize=2000000 14 | -------------------------------------------------------------------------------- /integration_tests/docker/trino/etc/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=docker 2 | node.data-dir=/data/trino 3 | -------------------------------------------------------------------------------- /integration_tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-xdist 3 | pytest-parametrization 4 | pytest-html 5 | filelock 6 | urllib3==2.0.6 7 | -------------------------------------------------------------------------------- /integration_tests/tests/data_generator.py: -------------------------------------------------------------------------------- 1 | from datetime import date, datetime, time, timedelta 2 | 3 | DATE_FORMAT = "%Y-%m-%d %H:%M:%S" 4 | 5 | 6 | def generate_dates(base_date, step=None, days_back=31): 7 | step = step or timedelta(days=1) 8 | if type(base_date) is date and step.days == 0: 9 | # This is needed to avoid an infinite loop 10 | base_date = datetime.combine(base_date, time.min) 11 | 12 | min_date = base_date - timedelta(days=days_back) 13 | dates = [] 14 | while base_date > min_date: 15 | dates.append(base_date) 16 | base_date = base_date - step 17 | return dates 18 | -------------------------------------------------------------------------------- /integration_tests/tests/data_seeder.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from pathlib import Path 3 | from typing import List 4 | 5 | from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner 6 | from logger import get_logger 7 | 8 | # TODO: Write more performant data seeders per adapter. 9 | 10 | logger = get_logger(__name__) 11 | 12 | 13 | class DbtDataSeeder: 14 | def __init__( 15 | self, dbt_runner: BaseDbtRunner, dbt_project_path: Path, seeds_dir_path: Path 16 | ): 17 | self.dbt_runner = dbt_runner 18 | self.dbt_project_path = dbt_project_path 19 | self.seeds_dir_path = seeds_dir_path 20 | 21 | def seed(self, data: List[dict], table_name: str): 22 | seed_path = self.seeds_dir_path.joinpath(f"{table_name}.csv") 23 | try: 24 | with seed_path.open("w") as seed_file: 25 | relative_seed_path = seed_path.relative_to(self.dbt_project_path) 26 | writer = csv.DictWriter(seed_file, fieldnames=data[0].keys()) 27 | writer.writeheader() 28 | writer.writerows(data) 29 | seed_file.flush() 30 | self.dbt_runner.seed(select=str(relative_seed_path), full_refresh=True) 31 | finally: 32 | seed_path.unlink() 33 | -------------------------------------------------------------------------------- /integration_tests/tests/dbt_flags.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | from dbt_project import DbtProject 5 | from ruamel.yaml import YAML 6 | 7 | 8 | @contextmanager 9 | def set_flags(dbt_project: DbtProject, flags: Dict[str, Any]) -> Iterator[None]: 10 | dbt_project_yaml_path = dbt_project.project_dir_path / "dbt_project.yml" 11 | original_dbt_project_yaml = YAML().load(dbt_project_yaml_path) 12 | with dbt_project_yaml_path.open("w") as f: 13 | YAML().dump({**original_dbt_project_yaml, "flags": flags}, f) 14 | yield 15 | with dbt_project_yaml_path.open("w") as f: 16 | YAML().dump(original_dbt_project_yaml, f) 17 | -------------------------------------------------------------------------------- /integration_tests/tests/env.py: -------------------------------------------------------------------------------- 1 | import dbt_project 2 | 3 | 4 | class Environment: 5 | def __init__(self, target: str, project_dir: str): 6 | self.dbt_runner = dbt_project.get_dbt_runner(target, project_dir) 7 | 8 | def clear(self): 9 | self.dbt_runner.run_operation("elementary_tests.clear_env") 10 | 11 | def init(self): 12 | self.dbt_runner.run(selector="init") 13 | self.dbt_runner.run(select="elementary") 14 | -------------------------------------------------------------------------------- /integration_tests/tests/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def get_logger(logger_name): 6 | handler = logging.StreamHandler(sys.stdout) 7 | handler.setFormatter(logging.Formatter("%(asctime)s — %(levelname)s — %(message)s")) 8 | logger = logging.getLogger(logger_name) 9 | logger.setLevel(logging.DEBUG) 10 | logger.addHandler(handler) 11 | return logger 12 | -------------------------------------------------------------------------------- /integration_tests/tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | skip_targets(targets): skip test for the given targets 4 | only_on_targets(targets): skip test for non given targets 5 | requires_dbt_version(version): skip test if dbt version is not satisfied 6 | -------------------------------------------------------------------------------- /integration_tests/tests/test_disable_elementary.py: -------------------------------------------------------------------------------- 1 | from dbt_project import DbtProject 2 | 3 | COLUMN_NAME = "value" 4 | 5 | 6 | def test_running_dbt_tests_without_elementary(test_id: str, dbt_project: DbtProject): 7 | data = [{COLUMN_NAME: "hello"}] 8 | test_result = dbt_project.test( 9 | test_id, 10 | "not_null", 11 | dict(column_name=COLUMN_NAME), 12 | data=data, 13 | elementary_enabled=False, 14 | ) 15 | assert test_result["status"] == "pass" 16 | -------------------------------------------------------------------------------- /integration_tests/tests/test_jsonschema.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | from dbt_project import DbtProject 5 | 6 | MIN_LENGTH = 3 7 | MAX_LENGTH = 5 8 | SCHEMA = {"type": "string", "minLength": MIN_LENGTH, "maxLength": MAX_LENGTH} 9 | COLUMN_NAME = "jsonschema_column" 10 | TEST_NAME = "elementary.json_schema" 11 | 12 | # BigQuery also supports JSON schema tests, but the Python tests are currently flaky. 13 | SUPPORTED_TARGETS = ["snowflake"] 14 | 15 | 16 | @pytest.mark.requires_dbt_version("1.3.0") 17 | class TestJsonschema: 18 | @pytest.mark.only_on_targets(SUPPORTED_TARGETS) 19 | def test_valid(self, test_id: str, dbt_project: DbtProject): 20 | valid_value = json.dumps("".join("*" for _ in range(MIN_LENGTH))) 21 | data = [{COLUMN_NAME: valid_value}] 22 | result = dbt_project.test( 23 | test_id, TEST_NAME, dict(column_name=COLUMN_NAME, **SCHEMA), data=data 24 | ) 25 | assert result["status"] == "pass" 26 | 27 | @pytest.mark.only_on_targets(SUPPORTED_TARGETS) 28 | def test_invalid(self, test_id: str, dbt_project: DbtProject): 29 | invalid_value = json.dumps("".join("*" for _ in range(MIN_LENGTH - 1))) 30 | data = [{COLUMN_NAME: invalid_value}] 31 | result = dbt_project.test( 32 | test_id, TEST_NAME, dict(column_name=COLUMN_NAME, **SCHEMA), data=data 33 | ) 34 | assert result["status"] == "fail" 35 | 36 | @pytest.mark.skip_targets([*SUPPORTED_TARGETS, "bigquery"]) 37 | def test_invalid_target(self, test_id: str, dbt_project: DbtProject): 38 | data = [{COLUMN_NAME: str()}] 39 | result = dbt_project.test( 40 | test_id, TEST_NAME, dict(column_name=COLUMN_NAME, **SCHEMA), data=data 41 | ) 42 | assert result["status"] == "error" 43 | -------------------------------------------------------------------------------- /integration_tests/tests/test_long_strings.py: -------------------------------------------------------------------------------- 1 | from dbt_project import DbtProject 2 | 3 | SAFE_QUERY_SIZE = 10000 4 | 5 | 6 | def generate_query(query_size: int) -> str: 7 | query_start = "SELECT '" 8 | query_end = "' as col" 9 | query_mid = "A" * (query_size - len(query_start) - len(query_end)) 10 | return query_start + query_mid + query_end 11 | 12 | 13 | def read_run_result(dbt_project, test_id): 14 | return dbt_project.read_table( 15 | "dbt_run_results", 16 | where=f"unique_id = 'model.elementary_tests.{test_id}'", 17 | )[0] 18 | 19 | 20 | def test_query_size_exceed(test_id: str, dbt_project: DbtProject): 21 | dbt_project.dbt_runner.vars["disable_run_results"] = False 22 | max_query_size = int( 23 | dbt_project.dbt_runner.run_operation( 24 | "elementary.get_config_var", macro_args={"var_name": "query_max_size"} 25 | )[0] 26 | ) 27 | 28 | query = generate_query(max_query_size) 29 | with dbt_project.create_temp_model_for_existing_table( 30 | test_id, raw_code=query 31 | ) as model_path: 32 | dbt_project.dbt_runner.run(select=str(model_path)) 33 | result = read_run_result(dbt_project, test_id) 34 | # Expect truncation. 35 | assert len(result["compiled_code"]) < max_query_size 36 | 37 | 38 | def test_query_size_safe(test_id: str, dbt_project: DbtProject): 39 | dbt_project.dbt_runner.vars["disable_run_results"] = False 40 | query = generate_query(SAFE_QUERY_SIZE) 41 | with dbt_project.create_temp_model_for_existing_table( 42 | test_id, raw_code=query 43 | ) as model_path: 44 | dbt_project.dbt_runner.run(select=str(model_path)) 45 | result = read_run_result(dbt_project, test_id) 46 | assert len(result["compiled_code"]) == SAFE_QUERY_SIZE 47 | -------------------------------------------------------------------------------- /integration_tests/tests/test_sampling.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | from dbt_project import DbtProject 5 | 6 | COLUMN_NAME = "value" 7 | 8 | 9 | SAMPLES_QUERY = """ 10 | with latest_elementary_test_result as ( 11 | select id 12 | from {{{{ ref("elementary_test_results") }}}} 13 | where lower(table_name) = lower('{test_id}') 14 | order by created_at desc 15 | limit 1 16 | ) 17 | 18 | select result_row 19 | from {{{{ ref("test_result_rows") }}}} 20 | where elementary_test_results_id in (select * from latest_elementary_test_result) 21 | """ 22 | 23 | TEST_SAMPLE_ROW_COUNT = 7 24 | 25 | 26 | # Sampling currently not supported on ClickHouse 27 | @pytest.mark.skip_targets(["clickhouse"]) 28 | def test_sampling(test_id: str, dbt_project: DbtProject): 29 | null_count = 50 30 | data = [{COLUMN_NAME: None} for _ in range(null_count)] 31 | test_result = dbt_project.test( 32 | test_id, 33 | "not_null", 34 | dict(column_name=COLUMN_NAME), 35 | data=data, 36 | test_vars={ 37 | "enable_elementary_test_materialization": True, 38 | "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, 39 | }, 40 | ) 41 | assert test_result["status"] == "fail" 42 | 43 | samples = [ 44 | json.loads(row["result_row"]) 45 | for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) 46 | ] 47 | assert len(samples) == TEST_SAMPLE_ROW_COUNT 48 | assert all([row == {COLUMN_NAME: None} for row in samples]) 49 | -------------------------------------------------------------------------------- /integration_tests/tests/test_string_monitors.py: -------------------------------------------------------------------------------- 1 | from dbt_project import DbtProject 2 | 3 | COLUMN_NAME = "string_column" 4 | 5 | 6 | def test_missing_count(dbt_project: DbtProject, test_id: str): 7 | missing_values = [None, " ", "null", "NULL"] 8 | data = [{COLUMN_NAME: value} for value in ["a", "b", "c", " a "] + missing_values] 9 | dbt_project.seed(data, test_id) 10 | result = dbt_project.run_query( 11 | f"select {{{{ elementary.missing_count('{COLUMN_NAME}') }}}} " 12 | f"as missing_count from {{{{ generate_schema_name() }}}}.{test_id}" 13 | )[0] 14 | assert result["missing_count"] == len(missing_values) 15 | -------------------------------------------------------------------------------- /macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/macros/.gitkeep -------------------------------------------------------------------------------- /macros/commands/delete_duplicate_rows.sql: -------------------------------------------------------------------------------- 1 | {% macro delete_duplicate_rows(model_unique_id) %} 2 | {% do return(adapter.dispatch("delete_duplicate_rows", "elementary")(model_unique_id)) %} 3 | {% endmacro %} 4 | 5 | {% macro postgres__delete_duplicate_rows(model_unique_id) %} 6 | {% set node = graph.nodes[model_unique_id] %} 7 | {% set relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=node.alias) %} 8 | {% if relation is none %} 9 | {% do print("Relation '{}' does not exist.".format(node.relation_name)) %} 10 | {% do return([]) %} 11 | {% endif %} 12 | 13 | {% set column_names = adapter.get_columns_in_relation(relation) | map(attribute="name") | map("lower") | list %} 14 | 15 | {% set query %} 16 | DELETE FROM {{ relation }} AS t1 17 | USING {{ relation }} AS t2 18 | WHERE t1.ctid < t2.ctid 19 | {% for col in column_names %} 20 | AND t1.{{ col }} = t2.{{ col }} 21 | {% endfor %} 22 | {% endset %} 23 | {% do elementary.run_query(query) %} 24 | {% do adapter.commit() %} 25 | {% endmacro %} 26 | 27 | {% macro default__delete_duplicate_rows(model_unique_id) %} 28 | {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }} 29 | {% endmacro %} 30 | -------------------------------------------------------------------------------- /macros/commands/generate_elementary_cli_profile.sql: -------------------------------------------------------------------------------- 1 | {% macro generate_elementary_cli_profile(method=none) %} 2 | {% set profile_parameters = elementary.generate_elementary_profile_args(method) %} 3 | {% if profile_parameters is string %} 4 | {% set profile = profile_parameters %} 5 | {% else %} 6 | {% set profile = elementary.cli_profile_from_parameters(profile_parameters)%} 7 | {% endif %} 8 | {{ log('\n' ~ profile, info=True) }} 9 | {% endmacro %} 10 | 11 | {% macro cli_profile_from_parameters(parameters) %} 12 | elementary: 13 | outputs: 14 | default: 15 | {% for parameter in parameters -%} 16 | {%- set key = parameter["name"] -%} 17 | {%- set value = parameter["value"] -%} 18 | {%- if value is string -%} 19 | {%- set value = '"' ~ value ~ '"' -%} 20 | {%- endif -%} 21 | {{ key }}: {{ value }}{% if parameter["comment"] %} # {{ parameter["comment"] }}{% endif %} 22 | {% endfor -%} 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /macros/commands/permissions/get_required_permissions.sql: -------------------------------------------------------------------------------- 1 | {% macro get_required_permissions() %} 2 | {% do return(adapter.dispatch('get_required_permissions','elementary')()) %} 3 | {% endmacro %} 4 | 5 | {% macro bigquery__get_required_permissions() %} 6 | {% do elementary.get_required_query_history_permissions() %} 7 | {% do elementary.get_required_information_schema_permissions() %} 8 | {% endmacro %} 9 | 10 | {% macro default__get_required_permissions() %} 11 | {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/commands/permissions/query_history_permissions.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_query_history_permissions() %} 2 | {% do return(adapter.dispatch('validate_query_history_permissions','elementary')()) %} 3 | {% endmacro %} 4 | 5 | {% macro bigquery__validate_query_history_permissions() %} 6 | {% set relevant_databases = elementary.get_relevant_databases() %} 7 | {% for relevant_database in relevant_databases %} 8 | {% do print('\nValidating access to INFORMATION_SCHEMA.JOBS for the project {} datasets - required role "roles/bigquery.resourceViewer"'.format(relevant_database)) %} 9 | {% set query = "select 1 from {}.region-{}.INFORMATION_SCHEMA.JOBS limit 1" .format(relevant_database, target.location)%} 10 | {% do elementary.run_query(query) %} 11 | {% endfor %} 12 | {% endmacro %} 13 | 14 | {% macro default__validate_query_history_permissions() %} 15 | {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }} 16 | {% endmacro %} 17 | 18 | 19 | {% macro get_required_query_history_permissions() %} 20 | {% do return(adapter.dispatch('get_required_query_history_permissions','elementary')()) %} 21 | {% endmacro %} 22 | 23 | {% macro bigquery__get_required_query_history_permissions() %} 24 | {% set relevant_databases = elementary.get_relevant_databases() %} 25 | {% do print('\nPlease make sure you provide the role "roles/bigquery.resourceViewer" to the following projects` datasets:') %} 26 | {% for relevant_database in relevant_databases %} 27 | {% do print(' - {}'.format(relevant_database)) %} 28 | {% endfor %} 29 | {% endmacro %} 30 | 31 | {% macro default__get_required_query_history_permissions() %} 32 | {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }} 33 | {% endmacro %} 34 | -------------------------------------------------------------------------------- /macros/commands/permissions/validate_permissions.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_permissions() %} 2 | {% do return(adapter.dispatch('validate_permissions','elementary')()) %} 3 | {% endmacro %} 4 | 5 | {% macro bigquery__validate_permissions() %} 6 | {% do print("\nValidating all required permissions are granted:") %} 7 | {% do elementary.validate_information_schema_permissions() %} 8 | {% do elementary.validate_query_history_permissions() %} 9 | {% do print("\nAll required permissions are granted!") %} 10 | {% endmacro %} 11 | 12 | {% macro default__validate_permissions() %} 13 | {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/edr/alerts/dbt_run_results_description.sql: -------------------------------------------------------------------------------- 1 | {% macro dbt_model_run_result_description() %} 2 | 'The model ' || name || ' returned ' || status || ' at ' || generated_at || ' on run ' || invocation_id 3 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/data_monitoring/anomaly_detection/store_metrics_in_cache.sql: -------------------------------------------------------------------------------- 1 | {% macro store_metrics_table_in_cache() %} 2 | {% set metrics_tables_cache = elementary.get_cache("tables").get("metrics").get("relations") %} 3 | {% set metrics_table = elementary.get_elementary_test_table(elementary.get_elementary_test_table_name(), 'metrics') %} 4 | {% if metrics_table %} 5 | {% do metrics_tables_cache.append(metrics_table) %} 6 | {% endif %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/data_monitors_configuration/get_table_monitors.sql: -------------------------------------------------------------------------------- 1 | {% macro get_final_table_monitors(monitors=none) %} 2 | {%- set final_table_monitors = [] %} 3 | 4 | {%- if monitors and monitors | length > 0 %} 5 | {%- set allowed_table_monitors = elementary.get_allowed_table_monitors() %} 6 | {%- set final_table_monitors = elementary.lists_intersection(monitors, allowed_table_monitors) %} 7 | {%- else %} 8 | {%- set final_table_monitors = elementary.get_default_table_monitors() %} 9 | {%- endif %} 10 | {{ return(final_table_monitors) }} 11 | {% endmacro %} 12 | 13 | 14 | {% macro get_default_table_monitors() %} 15 | {%- set default_table_monitors = elementary.get_config_var('edr_monitors')['table'] | list %} 16 | {{ return(default_table_monitors) }} 17 | {% endmacro %} 18 | 19 | 20 | {% macro get_allowed_table_monitors() %} 21 | {% do return(["row_count", "freshness", "event_freshness"]) %} 22 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors/column_any_type_monitors.sql: -------------------------------------------------------------------------------- 1 | {% macro null_count(column_name) %} 2 | coalesce(sum(case when {{ column_name }} is null then 1 else 0 end), 0) 3 | {% endmacro %} 4 | 5 | {% macro null_percent(column_name) %} 6 | {{ elementary.edr_percent(elementary.null_count(column_name), elementary.row_count()) }} 7 | {% endmacro %} 8 | 9 | {% macro not_null_percent(column_name) %} 10 | {{ elementary.edr_not_percent(elementary.null_count(column_name), elementary.row_count()) }} 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors/column_boolean_monitors.sql: -------------------------------------------------------------------------------- 1 | {% macro count_true(column_name) -%} 2 | coalesce(sum(case when cast({{ column_name }} as {{ elementary.edr_type_bool() }}) = true then 1 else 0 end), 0) 3 | {%- endmacro %} 4 | 5 | {% macro count_false(column_name) -%} 6 | coalesce(sum(case when cast({{ column_name }} as {{ elementary.edr_type_bool() }}) = true then 0 else 1 end), 0) 7 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors/column_string_monitors.sql: -------------------------------------------------------------------------------- 1 | {% macro max_length(column_name) -%} 2 | max(length({{ column_name }})) 3 | {%- endmacro %} 4 | 5 | {% macro min_length(column_name) -%} 6 | min(length({{ column_name }})) 7 | {%- endmacro %} 8 | 9 | {% macro average_length(column_name) -%} 10 | avg(length({{ column_name }})) 11 | {%- endmacro %} 12 | 13 | {% macro missing_count(column_name) %} 14 | coalesce(sum(case when {{ column_name }} is null then 1 when trim({{ column_name }}) = '' then 1 when lower({{ column_name }}) = 'null' then 1 else 0 end), 0) 15 | {% endmacro %} 16 | 17 | {% macro missing_percent(column_name) %} 18 | {{ elementary.edr_percent(elementary.missing_count(column_name), elementary.row_count()) }} 19 | {% endmacro %} 20 | 21 | {% macro not_missing_percent(column_name) %} 22 | {{ elementary.edr_not_percent(elementary.missing_count(column_name), elementary.row_count()) }} 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors/monitors.sql: -------------------------------------------------------------------------------- 1 | {% macro get_available_monitors() %} 2 | {% do return({ 3 | 'table': ['row_count', 'freshness'], 4 | 'column_any_type': ['null_count', 'null_percent', 'not_null_percent'], 5 | 'column_string': ['min_length', 'max_length', 'average_length', 'missing_count', 'missing_percent', 'not_missing_percent'], 6 | 'column_numeric': ['min', 'max', 'zero_count', 'zero_percent', 'not_zero_percent', 'average', 'standard_deviation', 'variance', 'sum'], 7 | 'column_boolean': ['count_true', 'count_false'] 8 | }) %} 9 | {% endmacro %} 10 | 11 | {% macro get_default_monitors() %} 12 | {% do return({ 13 | 'table': ['row_count', 'freshness'], 14 | 'column_any_type': ['null_count', 'null_percent'], 15 | 'column_string': ['min_length', 'max_length', 'average_length', 'missing_count', 'missing_percent'], 16 | 'column_numeric': ['min', 'max', 'zero_count', 'zero_percent', 'average', 'standard_deviation', 'variance'], 17 | 'column_boolean': ['count_true', 'count_false'] 18 | }) %} 19 | {% endmacro %} 20 | 21 | {% macro get_available_table_monitors() %} 22 | {% do return(elementary.get_available_monitors()["table"]) %} 23 | {% endmacro %} 24 | 25 | {% macro get_available_column_monitors() %} 26 | {% set available_col_monitors = [] %} 27 | {% for monitor_type, monitors in elementary.get_available_monitors().items() %} 28 | {% if monitor_type.startswith("column") %} 29 | {% do available_col_monitors.extend(monitors) %} 30 | {% endif %} 31 | {% endfor %} 32 | {% do return(available_col_monitors) %} 33 | {% endmacro %} 34 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors/table_monitors.sql: -------------------------------------------------------------------------------- 1 | {% macro row_count() -%} 2 | count(*) 3 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors_query/get_latest_full_refresh.sql: -------------------------------------------------------------------------------- 1 | {% macro get_latest_full_refresh(model_node) %} 2 | {%- set dbt_run_results_relation = elementary.get_elementary_relation('dbt_run_results') %} 3 | {% set query %} 4 | select generated_at from {{ dbt_run_results_relation }} 5 | where 6 | unique_id = '{{ model_node.unique_id }}' and 7 | full_refresh = true 8 | order by generated_at desc 9 | limit 1 10 | {% endset %} 11 | {% do return(elementary.result_value(query)) %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/monitors_query/get_start_bucket_in_data.sql: -------------------------------------------------------------------------------- 1 | {% macro get_start_bucket_in_data(timestamp_column, min_bucket_start, time_bucket) %} 2 | {% set bucket_start_datediff_expr %} 3 | floor({{ elementary.edr_datediff(min_bucket_start, elementary.edr_cast_as_timestamp(timestamp_column), time_bucket.period) }} / {{ time_bucket.count }}) * {{ time_bucket.count }} 4 | {% endset %} 5 | {% do return(elementary.edr_cast_as_timestamp(elementary.edr_timeadd(time_bucket.period, elementary.edr_cast_as_int(bucket_start_datediff_expr), min_bucket_start))) %} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/schema_changes/get_last_schema_changes_time.sql: -------------------------------------------------------------------------------- 1 | {% macro get_last_schema_changes_time() %} 2 | -- depends_on: {{ ref('elementary_test_results') }} 3 | {%- if execute -%} 4 | {%- set last_schema_changes_time_query %} 5 | select max(detected_at) as last_alert_time 6 | from {{ ref('elementary_test_results') }} 7 | where test_type = 'schema_change' and test_sub_type != 'table_added' 8 | {%- endset %} 9 | 10 | {%- set last_schema_changes_query_result = elementary.result_value(last_schema_changes_time_query) %} 11 | 12 | {%- if last_schema_changes_query_result %} 13 | {{ return(last_schema_changes_query_result) }} 14 | {%- else %} 15 | {{ return(none) }} 16 | {%- endif %} 17 | {%- endif -%} 18 | {{- return(none) -}} 19 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/data_monitoring/schema_changes/store_schema_snapshot_tables_in_cache.sql: -------------------------------------------------------------------------------- 1 | {% macro store_schema_snapshot_tables_in_cache() %} 2 | {% set schema_snapshots_tables_cache = elementary.get_cache("tables").get("schema_snapshots") %} 3 | {% set schema_snapshots_table = elementary.get_elementary_test_table(elementary.get_elementary_test_table_name(), 'schema_changes') %} 4 | {% if schema_snapshots_table %} 5 | {% do schema_snapshots_tables_cache.append(schema_snapshots_table) %} 6 | {% endif %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/edr/data_monitoring/schema_changes/store_schema_test_results.sql: -------------------------------------------------------------------------------- 1 | {% macro store_schema_test_results(flattened_test, schema_changes_sql) %} 2 | {% set elementary_test_results_rows = [] %} 3 | {% set schema_changes_rows = elementary.agate_to_dicts(elementary.run_query(schema_changes_sql)) %} 4 | {% for schema_changes_row in schema_changes_rows %} 5 | {% do elementary_test_results_rows.append(elementary.get_schema_changes_test_result_row(flattened_test, schema_changes_row, schema_changes_rows)) %} 6 | {% endfor %} 7 | {% do elementary.cache_elementary_test_results_rows(elementary_test_results_rows) %} 8 | {% endmacro %} 9 | 10 | {% macro get_schema_changes_test_result_row(flattened_test, schema_changes_row, schema_changes_rows) %} 11 | {% set elementary_test_row = elementary.get_dbt_test_result_row(flattened_test, schema_changes_rows) %} 12 | {% do elementary_test_row.update(schema_changes_row) %} 13 | {% do return(elementary_test_row) %} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/edr/dbt_artifacts/get_artifact_metadata_hash.sql: -------------------------------------------------------------------------------- 1 | {% macro get_artifact_metadata_hash(artifact) %} 2 | {% if not local_md5 %} 3 | {% do return(none) %} 4 | {% endif %} 5 | 6 | {% set time_excluded_artifact = artifact.copy() %} 7 | {% do time_excluded_artifact.pop("generated_at") %} 8 | {% do return(local_md5(tojson(time_excluded_artifact, sort_keys=true))) %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/edr/dbt_artifacts/upload_dbt_groups.sql: -------------------------------------------------------------------------------- 1 | {%- macro upload_dbt_groups(should_commit=false, metadata_hashes=none) -%} 2 | {% set relation = elementary.get_elementary_relation('dbt_groups') %} 3 | {% if execute and relation %} 4 | {% set groups = graph.groups.values() | selectattr('resource_type', '==', 'group') %} 5 | {% do elementary.upload_artifacts_to_table(relation, groups, elementary.flatten_group, should_commit=should_commit, metadata_hashes=metadata_hashes) %} 6 | {%- endif -%} 7 | {{- return('') -}} 8 | {%- endmacro -%} 9 | 10 | {% macro get_dbt_groups_empty_table_query() %} 11 | {% set columns = [ 12 | ('unique_id', 'string'), 13 | ('name', 'string'), 14 | ('owner_email', 'string'), 15 | ('owner_name', 'string'), 16 | ('generated_at', 'string'), 17 | ('metadata_hash', 'string'), 18 | ] %} 19 | 20 | {% set dbt_groups_empty_table_query = elementary.empty_table(columns) %} 21 | {{ return(dbt_groups_empty_table_query) }} 22 | {% endmacro %} 23 | 24 | {% macro flatten_group(node_dict) %} 25 | {% set owner_dict = elementary.safe_get_with_default(node_dict, 'owner', {}) %} 26 | 27 | {% set flatten_group_metadata_dict = { 28 | 'unique_id': node_dict.get('unique_id'), 29 | 'name': node_dict.get('name'), 30 | 'owner_email': owner_dict.get('email'), 31 | 'owner_name': owner_dict.get('name'), 32 | 'generated_at': elementary.datetime_now_utc_as_string(), 33 | } %} 34 | {% do flatten_group_metadata_dict.update({'metadata_hash': elementary.get_artifact_metadata_hash(flatten_group_metadata_dict)}) %} 35 | {{ return(flatten_group_metadata_dict) }} 36 | {% endmacro %} 37 | -------------------------------------------------------------------------------- /macros/edr/dbt_artifacts/upload_dbt_snapshots.sql: -------------------------------------------------------------------------------- 1 | {%- macro upload_dbt_snapshots(should_commit=false, metadata_hashes=none) -%} 2 | {% set relation = elementary.get_elementary_relation('dbt_snapshots') %} 3 | {% if execute and relation %} 4 | {% set snapshots = graph.nodes.values() | selectattr('resource_type', '==', 'snapshot') %} 5 | {% do elementary.upload_artifacts_to_table(relation, snapshots, elementary.flatten_model, should_commit=should_commit, metadata_hashes=metadata_hashes) %} 6 | {%- endif -%} 7 | {{- return('') -}} 8 | {%- endmacro -%} 9 | -------------------------------------------------------------------------------- /macros/edr/materializations/test/failed_row_count.sql: -------------------------------------------------------------------------------- 1 | {% macro get_failed_row_count(flattened_test) %} 2 | {% set test_result = elementary.get_test_result() %} 3 | {% if config.get("fail_calc").strip() == elementary.get_failed_row_count_calc(flattened_test) %} 4 | {% do elementary.debug_log("Using test failures as failed_rows value.") %} 5 | {% do return(test_result.failures|int) %} 6 | {% endif %} 7 | {% if elementary.did_test_pass(test_result) %} 8 | {% do return(none) %} 9 | {% endif %} 10 | {% set failed_row_count_query = elementary.get_failed_row_count_query(flattened_test) %} 11 | {% if failed_row_count_query %} 12 | {% set result_count = elementary.result_value(failed_row_count_query) %} 13 | {% do return(result_count) %} 14 | {% endif %} 15 | {% do return(none) %} 16 | {% endmacro %} 17 | 18 | {% macro get_failed_row_count_query(flattened_test) %} 19 | {% set failed_row_count_calc = elementary.get_failed_row_count_calc(flattened_test) %} 20 | {% if failed_row_count_calc %} 21 | {% set failed_row_count_query = elementary.get_failed_row_count_calc_query(failed_row_count_calc) %} 22 | {% do return(failed_row_count_query) %} 23 | {% endif %} 24 | {% do return(none) %} 25 | {% endmacro %} 26 | 27 | {% macro get_failed_row_count_calc(flattened_test) %} 28 | {% if "failed_row_count_calc" in flattened_test["meta"] %} 29 | {% do return(flattened_test["meta"]["failed_row_count_calc"]) %} 30 | {% endif %} 31 | {% set common_test_config = elementary.get_common_test_config(flattened_test) %} 32 | {% if common_test_config %} 33 | {% do return(common_test_config.get("failed_row_count_calc")) %} 34 | {% endif %} 35 | {% do return(none) %} 36 | {% endmacro %} 37 | 38 | {% macro get_failed_row_count_calc_query(failed_row_count_calc) %} 39 | with results as ( 40 | {{ sql }} 41 | ) 42 | select {{ failed_row_count_calc }} as count from results 43 | {% endmacro %} 44 | -------------------------------------------------------------------------------- /macros/edr/materializations/test/test_result.sql: -------------------------------------------------------------------------------- 1 | {% macro get_test_result() %} 2 | {# 3 | This macro should return a dictionary with the following keys: 4 | - failures - the result of dbts fail_calc 5 | - should_warn 6 | - should_error 7 | #} 8 | {% set result = load_result('main') %} 9 | {% set rows = elementary.agate_to_dicts(result.table) %} 10 | {% do return(rows[0]) %} 11 | {% endmacro %} 12 | 13 | 14 | {% macro did_test_pass(test_result=none) %} 15 | {% if test_result is none %} 16 | {% set test_result = elementary.get_test_result() %} 17 | {% endif %} 18 | {% do return(not test_result.should_warn and not test_result.should_error) %} 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /macros/edr/metadata_collection/get_columns_by_schemas.sql: -------------------------------------------------------------------------------- 1 | {% macro get_columns_by_schemas(configured_schemas) %} 2 | {%- if configured_schemas | length > 0 -%} 3 | {{ elementary.union_macro_queries(configured_schemas, elementary.get_columns_from_information_schema) }} 4 | {%- else %} 5 | {{ elementary.get_empty_columns_from_information_schema_table() }} 6 | {%- endif %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/edr/metadata_collection/get_columns_in_project.sql: -------------------------------------------------------------------------------- 1 | {% macro get_columns_in_project() %} 2 | {% set configured_schemas = elementary.get_configured_schemas_from_graph() %} 3 | {{ elementary.get_columns_by_schemas(configured_schemas) }} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /macros/edr/metadata_collection/get_metric_properties.sql: -------------------------------------------------------------------------------- 1 | {% macro get_metric_properties( 2 | model_graph_node, 3 | timestamp_column, 4 | where_expression, 5 | time_bucket, 6 | dimensions=none, 7 | freshness_column=none, 8 | event_timestamp_column=none, 9 | collected_by=none 10 | ) %} 11 | {% set timestamp_column = elementary.get_test_argument('timestamp_column', timestamp_column, model_graph_node) %} 12 | {% set where_expression = elementary.get_test_argument('where_expression', where_expression, model_graph_node) %} 13 | {% set time_bucket = elementary.get_time_bucket(time_bucket, model_graph_node) %} 14 | {% set freshness_column = elementary.get_test_argument('freshness_column', freshness_column, model_graph_node) %} 15 | {% set event_timestamp_column = elementary.get_test_argument('event_timestamp_column', event_timestamp_column, model_graph_node) %} 16 | {% set dimensions = elementary.get_test_argument('dimensions', dimensions, model_graph_node) %} 17 | {% set metric_props = { 18 | 'timestamp_column': timestamp_column, 19 | 'where_expression': where_expression, 20 | 'time_bucket': time_bucket, 21 | 'freshness_column': freshness_column, 22 | 'event_timestamp_column': event_timestamp_column, 23 | 'dimensions': dimensions 24 | } %} 25 | {% if collected_by %} 26 | {% do metric_props.update({'collected_by': collected_by}) %} 27 | {% endif %} 28 | {% do return(metric_props) %} 29 | {% endmacro %} 30 | -------------------------------------------------------------------------------- /macros/edr/system/configuration/get_configured_databases_from_graph.sql: -------------------------------------------------------------------------------- 1 | {% macro get_configured_databases_from_graph() %} 2 | {% set schema_tuples = elementary.get_configured_schemas_from_graph() %} 3 | {% do return(schema_tuples | map(attribute=0) | unique) %} 4 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/system/configuration/get_configured_schemas_from_graph.sql: -------------------------------------------------------------------------------- 1 | {% macro get_configured_schemas_from_graph() %} 2 | {% set configured_schemas = [] %} 3 | {% set existing_schemas = [] %} 4 | {% if execute %} 5 | {% set root_project = context["project_name"] %} 6 | {% set nodes = elementary.get_nodes_from_graph() %} 7 | {% for node in nodes %} 8 | {% if node.resource_type in ['model', 'source', 'snapshot', 'seed'] and node.package_name == root_project %} 9 | {% set schema_tuple = (node.database, node.schema) %} 10 | {% if schema_tuple not in configured_schemas %} 11 | {% do configured_schemas.append(schema_tuple) %} 12 | {% endif %} 13 | {% endif %} 14 | {% endfor %} 15 | 16 | {% for schema_tuple in configured_schemas %} 17 | {% set database_name = schema_tuple[0] %} 18 | {% set schema_name = schema_tuple[1] %} 19 | {% if elementary.schema_exists(database_name, schema_name) %} 20 | {% do existing_schemas.append(schema_tuple) %} 21 | {% endif %} 22 | {% endfor %} 23 | {% endif %} 24 | {{ return(existing_schemas) }} 25 | {% endmacro %} 26 | -------------------------------------------------------------------------------- /macros/edr/system/configuration/is_elementary_enabled.sql: -------------------------------------------------------------------------------- 1 | {% macro is_elementary_enabled() %} 2 | {% do return("elementary" in graph) %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/edr/system/hooks/on_run_end.sql: -------------------------------------------------------------------------------- 1 | {% macro on_run_end() %} 2 | {%- if execute and not elementary.is_docs_command() %} 3 | {% set edr_cli_run = elementary.get_config_var('edr_cli_run') %} 4 | {% if not execute or edr_cli_run %} 5 | {% do return("") %} 6 | {% endif %} 7 | 8 | {% if not elementary.get_config_var('disable_dbt_artifacts_autoupload') %} 9 | {% do elementary.upload_dbt_artifacts() %} 10 | {% endif %} 11 | 12 | {% if not elementary.get_config_var('disable_run_results') %} 13 | {% do elementary.upload_run_results() %} 14 | {% endif %} 15 | 16 | {% if elementary.is_test_command() and not elementary.get_config_var('disable_tests_results') %} 17 | {% do elementary.handle_tests_results() %} 18 | {% endif %} 19 | 20 | {% if elementary.is_freshness_command() and not elementary.get_config_var('disable_freshness_results') %} 21 | {% do elementary.upload_source_freshness() %} 22 | {% endif %} 23 | 24 | {% if not elementary.get_config_var('disable_dbt_invocation_autoupload') %} 25 | {% do elementary.upload_dbt_invocation() %} 26 | {% endif %} 27 | 28 | {# 29 | Elementary temp tables are not really temp and should be cleaned on the end of the run. 30 | We want to make sure we clean the temp tables even if elementary on run end hooks are disabled. 31 | 32 | IMPORTANT! - This must be running last because other hooks are relaying on the temp tables. 33 | #} 34 | {% if elementary.get_config_var("clean_elementary_temp_tables") %} 35 | {% do elementary.clean_elementary_temp_tables() %} 36 | {% endif %} 37 | {% endif %} 38 | {% endmacro %} 39 | -------------------------------------------------------------------------------- /macros/edr/system/hooks/on_run_start.sql: -------------------------------------------------------------------------------- 1 | {% macro on_run_start() %} 2 | {% set edr_cli_run = elementary.get_config_var('edr_cli_run') %} 3 | {% if not execute or edr_cli_run %} 4 | {{ return('') }} 5 | {% endif %} 6 | 7 | {% do elementary.recommend_dbt_core_artifacts_upgrade() %} 8 | {% do elementary.ensure_materialize_override() %} 9 | {% do elementary.init_elementary_graph() %} 10 | 11 | {% if elementary.is_test_command() %} 12 | {{ elementary.create_elementary_tests_schema() }} 13 | {% endif %} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/clean_dbt_columns_temp_tables.sql: -------------------------------------------------------------------------------- 1 | {% macro clean_dbt_columns_temp_tables() %} 2 | {% do elementary.edr_log("Deleting dbt_columns temp tables") %} 3 | {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %} 4 | {% set dbt_columns_temp_tables_relations = dbt_utils.get_relations_by_prefix(schema=elementary_schema, prefix='dbt_columns__tmp_', database=elementary_database) %} 5 | {% for temp_relation in dbt_columns_temp_tables_relations %} 6 | {% do elementary.edr_log("Deleting temp table - " ~ temp_relation) %} 7 | {% do adapter.drop_relation(temp_relation) %} 8 | {% endfor %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/clean_elementary_temp_tables.sql: -------------------------------------------------------------------------------- 1 | {% macro clean_elementary_temp_tables() %} 2 | {% do elementary.clean_elementary_test_tables() %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/get_elementary_package_version.sql: -------------------------------------------------------------------------------- 1 | {% macro get_elementary_package_version() %} 2 | {% set conf = elementary.get_runtime_config() %} 3 | {% do return(conf.dependencies["elementary"].version) %} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/get_first_env_var.sql: -------------------------------------------------------------------------------- 1 | {% macro get_first_env_var(var_names) %} 2 | {% for var_name in var_names %} 3 | {% set value = env_var(var_name, "") %} 4 | {% if value %} 5 | {{ return(value) }} 6 | {% endif %} 7 | {% endfor %} 8 | {{ return(none) }} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/get_run_started_at.sql: -------------------------------------------------------------------------------- 1 | {# This macro is for test purposes only! #} 2 | {# custom_run_started_at should be in ISO format. #} 3 | 4 | {% macro get_run_started_at() %} 5 | {% set custom_run_started_at = elementary.get_config_var('custom_run_started_at') %} 6 | {% if custom_run_started_at %} 7 | {# dbt run_started_at is fromtype datetime, so we convert the given custom time to be datetime as well. #} 8 | {{ return(modules.datetime.datetime.fromisoformat(custom_run_started_at)) }} 9 | {% else %} 10 | {{ return(run_started_at) }} 11 | {% endif %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/get_runtime_config.sql: -------------------------------------------------------------------------------- 1 | {% macro get_runtime_config() %} 2 | {{ return(builtins.ref.config) }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/get_test_argument.sql: -------------------------------------------------------------------------------- 1 | {% macro get_test_argument(argument_name, value, model_graph_node) %} 2 | {% if value %} 3 | {% do return(value) %} 4 | {%- endif %} 5 | {%- if model_graph_node %} 6 | {% set elementary_config = elementary.get_elementary_config_from_node(model_graph_node) %} 7 | {% if elementary_config and elementary_config is mapping %} 8 | {%- if argument_name in elementary_config %} 9 | {% do return(elementary_config.get(argument_name)) %} 10 | {%- endif %} 11 | {% endif %} 12 | {% endif %} 13 | {% set config_value = elementary.get_config_var(argument_name) %} 14 | {% if config_value is defined %} 15 | {% do return(config_value) %} 16 | {% endif %} 17 | {% do return(none) %} 18 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/system/system_utils/get_var.sql: -------------------------------------------------------------------------------- 1 | {% macro get_var(config_var_name, env_vars_names) %} 2 | {% do return(elementary.get_config_var(config_var_name) or elementary.get_first_env_var(env_vars_names)) %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/edr/system/system_utils/no_results_query.sql: -------------------------------------------------------------------------------- 1 | {% macro no_results_query() %} 2 | with nothing as (select 1 as num) 3 | select * from nothing where num = 2 4 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/system/system_utils/times.sql: -------------------------------------------------------------------------------- 1 | {% macro get_time_format() %} 2 | {% do return("%Y-%m-%d %H:%M:%S") %} 3 | {% endmacro %} 4 | 5 | {% macro run_started_at_as_string() %} 6 | {% do return(elementary.get_run_started_at().strftime(elementary.get_time_format())) %} 7 | {% endmacro %} 8 | 9 | {% macro datetime_now_utc_as_string() %} 10 | {% do return(modules.datetime.datetime.utcnow().strftime(elementary.get_time_format())) %} 11 | {% endmacro %} 12 | 13 | {% macro current_timestamp_column() %} 14 | cast ({{elementary.edr_current_timestamp_in_utc()}} as {{ elementary.edr_type_timestamp() }}) 15 | {% endmacro %} 16 | 17 | {% macro datetime_now_utc_as_timestamp_column() %} 18 | cast ('{{ elementary.datetime_now_utc_as_string() }}' as {{ elementary.edr_type_timestamp() }}) 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /macros/edr/tests/on_run_end/union_columns_snapshot_query.sql: -------------------------------------------------------------------------------- 1 | {% macro union_columns_snapshot_query(temp_columns_snapshot_tables) %} 2 | {%- if temp_columns_snapshot_tables | length > 0 %} 3 | {%- set union_temp_query -%} 4 | with union_temp_columns_snapshot as ( 5 | {%- for temp_table in temp_columns_snapshot_tables -%} 6 | select * from {{ temp_table }} 7 | {%- if not loop.last %} union all {% endif %} 8 | {%- endfor %} 9 | ), 10 | columns_snapshot_with_duplicates as ( 11 | select *, 12 | row_number() over (partition by column_state_id order by detected_at desc) as row_number 13 | from union_temp_columns_snapshot 14 | ) 15 | select 16 | column_state_id, 17 | full_column_name, 18 | full_table_name, 19 | column_name, 20 | data_type, 21 | is_new, 22 | detected_at 23 | from columns_snapshot_with_duplicates 24 | where row_number = 1 25 | {%- endset %} 26 | {{ return(union_temp_query) }} 27 | {%- endif %} 28 | {{ return(none) }} 29 | {% endmacro %} 30 | -------------------------------------------------------------------------------- /macros/edr/tests/on_run_end/union_metrics_query.sql: -------------------------------------------------------------------------------- 1 | {% macro union_metrics_query(temp_metrics_tables) %} 2 | {%- if temp_metrics_tables | length > 0 %} 3 | {%- set union_temp_query -%} 4 | with union_temps_metrics as ( 5 | {%- for temp_table in temp_metrics_tables -%} 6 | select * from {{ temp_table }} 7 | {%- if not loop.last %} union all {% endif %} 8 | {%- endfor %} 9 | ), 10 | metrics_with_duplicates as ( 11 | select *, 12 | row_number() over (partition by id order by updated_at desc) as row_number 13 | from union_temps_metrics 14 | ) 15 | select 16 | id, 17 | full_table_name, 18 | column_name, 19 | metric_name, 20 | metric_type, 21 | metric_value, 22 | source_value, 23 | bucket_start, 24 | bucket_end, 25 | bucket_duration_hours, 26 | updated_at, 27 | dimension, 28 | dimension_value, 29 | metric_properties 30 | from metrics_with_duplicates 31 | where row_number = 1 32 | {%- endset %} 33 | {{ return(union_temp_query) }} 34 | {%- endif %} 35 | {{ return(none) }} 36 | {% endmacro %} 37 | -------------------------------------------------------------------------------- /macros/edr/tests/on_run_start/create_elementary_tests_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro create_elementary_tests_schema() %} 2 | {% if execute and elementary.is_test_command() %} 3 | {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} 4 | {% set tests_schema_name = elementary.get_elementary_tests_schema(database_name, schema_name) %} 5 | {%- if tests_schema_name != schema_name and not adapter.check_schema_exists(database_name, tests_schema_name) %} 6 | {{ elementary.edr_log("Creating Elementary's tests schema.") }} 7 | {% set schema_relation = api.Relation.create(database=database_name, schema=tests_schema_name).without_identifier() %} 8 | {%- do dbt.create_schema(schema_relation) %} 9 | {% do adapter.commit() %} 10 | {%- endif %} 11 | {% endif %} 12 | {{ return('') }} 13 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/tests/on_run_start/ensure_materialize_override.sql: -------------------------------------------------------------------------------- 1 | {% macro ensure_materialize_override() %} 2 | {% if elementary.get_config_var("mute_ensure_materialization_override") %} 3 | {% do return(none) %} 4 | {% endif %} 5 | 6 | {% set runtime_config = elementary.get_runtime_config() %} 7 | {% if runtime_config.args.require_explicit_package_overrides_for_builtin_materializations is false %} 8 | {% do elementary.file_log("Materialization override is enabled.") %} 9 | {% do return(none) %} 10 | {% endif %} 11 | 12 | {% set major, minor, revision = dbt_version.split(".") %} 13 | {% set major = major | int %} 14 | {% set minor = minor | int %} 15 | {% if major > 1 or major == 1 and minor >= 8 %} 16 | {%- set msg %} 17 | IMPORTANT - Starting from dbt 1.8, users must explicitly allow packages to override materializations. 18 | Elementary requires this ability to support collection of samples and failed row count for dbt tests. 19 | Please add the following flag to dbt_project.yml to allow it: 20 | 21 | flags: 22 | require_explicit_package_overrides_for_builtin_materializations: false 23 | 24 | Notes - 25 | * This is a temporary measure that will result in a deprecation warning, please ignore it for now. Elementary is working with the dbt-core team on a more permanent solution. 26 | * This message can be muted by setting the 'mute_ensure_materialization_override' var to true. 27 | {% endset %} 28 | {% do log(msg, info=true) %} 29 | {% endif %} 30 | {% endmacro %} 31 | -------------------------------------------------------------------------------- /macros/edr/tests/on_run_start/init_elementary_graph.sql: -------------------------------------------------------------------------------- 1 | {% macro init_elementary_graph() %} 2 | {% do graph.setdefault("elementary", { 3 | "elementary_test_results": {}, 4 | "elementary_test_failed_row_counts": {}, 5 | "tests_schema_name": none, 6 | "tables": { 7 | "metrics": { 8 | "relations": [], 9 | "rows": [] 10 | }, 11 | "schema_snapshots": [] 12 | }, 13 | "temp_test_table_relations_map": {}, 14 | "duration_context_stack": {}, 15 | }) %} 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /macros/edr/tests/on_run_start/recommend_dbt_core_artifacts_upgrade.sql: -------------------------------------------------------------------------------- 1 | {% macro recommend_dbt_core_artifacts_upgrade() %} 2 | {% if elementary.get_config_var("mute_dbt_upgrade_recommendation") %} 3 | {% do return(none) %} 4 | {% endif %} 5 | 6 | {% set major, minor, revision = dbt_version.split(".") %} 7 | {% set major = major | int %} 8 | {% set minor = minor | int %} 9 | {% if major < 1 or major == 1 and minor < 4 %} 10 | {%- set msg %} 11 | You are using dbt version {{ dbt_version }}. 12 | Elementary introduced major performance improvements for dbt version 1.4.0 or later. 13 | More information on the performance impact can be found here: https://docs.elementary-data.com/dbt/on-run-end_hooks#performance-impact-of-on-run-end-hooks 14 | This message can be muted by setting the 'mute_dbt_upgrade_recommendation' var to true. 15 | {% endset %} 16 | {% do log(msg, info=true) %} 17 | {% endif %} 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /macros/edr/tests/test_configuration/get_anomaly_direction.sql: -------------------------------------------------------------------------------- 1 | {% macro get_anomaly_direction(anomaly_direction, model_graph_node) %} 2 | {%- set anomaly_direction = elementary.get_test_argument('anomaly_direction', anomaly_direction, model_graph_node) | lower %} 3 | {%- do elementary.validate_anomaly_direction(anomaly_direction) -%} 4 | {{ return(anomaly_direction) }} 5 | {% endmacro %} 6 | 7 | {% macro validate_anomaly_direction(anomaly_direction) %} 8 | {% if anomaly_direction %} 9 | {% set direction_case_insensitive = anomaly_direction %} 10 | {% if direction_case_insensitive not in ['drop','spike','both'] %} 11 | {% do exceptions.raise_compiler_error('Supported anomaly directions are: both, drop, spike. received anomaly_direction: {}'.format(anomaly_direction)) %} 12 | {% endif %} 13 | {% else %} 14 | {% do exceptions.raise_compiler_error('anomaly_direction can\'t be empty. Supported anomaly directions are: both, drop, spike') %} 15 | {% endif %} 16 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/tests/test_configuration/get_days_back.sql: -------------------------------------------------------------------------------- 1 | {% macro get_days_back(days_back, model_graph_node, seasonality=none) %} 2 | {% set days_back = elementary.get_test_argument('days_back', days_back, model_graph_node) %} 3 | {% if seasonality in ["day_of_week", "hour_of_week"] %} 4 | {% do return(days_back * 7) %} 5 | {% endif %} 6 | {% do return(days_back) %} 7 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/tests/test_configuration/get_exclude_final_results.sql: -------------------------------------------------------------------------------- 1 | {% macro get_exclude_final_results(exclude_final_results_arg) %} 2 | {% if not exclude_final_results_arg %} 3 | {{ return("1 = 1") }} 4 | {% endif %} 5 | 6 | {{ return(exclude_final_results_arg) }} 7 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/tests/test_configuration/get_model_baseline_columns.sql: -------------------------------------------------------------------------------- 1 | {% macro get_model_baseline_columns(model, enforce_types=False) %} 2 | {# Get baseline columns #} 3 | {% set model_relation = dbt.load_relation(model) %} 4 | {% set model_graph_node = elementary.get_model_graph_node(model_relation) %} 5 | 6 | {% set baseline = [] %} 7 | {% set columns_without_types = [] %} 8 | {% for column in model_graph_node["columns"].values() %} 9 | {% if "data_type" in column %} 10 | {% set info_schema_data_type = elementary.get_normalized_data_type(column["data_type"]) %} 11 | {% else %} 12 | {% set info_schema_data_type = none %} 13 | {% endif %} 14 | {% set column_info = {"column_name": column["name"], "data_type": info_schema_data_type } %} 15 | {% if column_info["data_type"] is none %} 16 | {% do columns_without_types.append(column_info["column_name"]) %} 17 | {% endif %} 18 | {% do baseline.append(column_info) %} 19 | {% endfor %} 20 | 21 | {% if columns_without_types %} 22 | {% if enforce_types %} 23 | {% do exceptions.raise_compiler_error("Data type not defined for columns `{}` on model `{}` for schema change from baseline test".format(columns_without_types, model)) %} 24 | {% else %} 25 | {% do elementary.edr_log_warning("missing data types for columns: " ~ columns_without_types) %} 26 | {% endif %} 27 | {% endif %} 28 | 29 | {% do return(baseline) %} 30 | {% endmacro %} 31 | -------------------------------------------------------------------------------- /macros/edr/tests/test_configuration/get_seasonality.sql: -------------------------------------------------------------------------------- 1 | {% macro get_seasonality(seasonality, model_graph_node, time_bucket, timestamp_column) %} 2 | {%- set seasonality = elementary.get_test_argument('seasonality', seasonality, model_graph_node) %} 3 | {%- do elementary.validate_seasonality(seasonality, time_bucket, timestamp_column) -%} 4 | {%- if seasonality %} 5 | {{ return(seasonality) }} 6 | {%- endif %} 7 | {{ return(none)}} 8 | {% endmacro %} 9 | 10 | {% macro validate_seasonality(seasonality, time_bucket, timestamp_column) %} 11 | {% if seasonality %} 12 | {% if not timestamp_column %} 13 | {% do exceptions.raise_compiler_error('Test with seasonality must have a timestamp_column, but none was provided') %} 14 | {% endif %} 15 | {% set supported_seasonality_values = ['day_of_week', 'hour_of_day', 'hour_of_week'] %} 16 | {%- set seasonality = seasonality | lower %} 17 | {% if seasonality not in supported_seasonality_values %} 18 | {% do exceptions.raise_compiler_error('Seasonality value should be one of' ~ supported_seasonality_values ~ ', got ' ~ seasonality ~ ' instead') %} 19 | {% endif %} 20 | {% if seasonality == 'day_of_week' and ((time_bucket.count != 1) or (time_bucket.period != 'day')) %} 21 | {% do exceptions.raise_compiler_error('Daily seasonality is supported only with time_bucket 1 day, got period: ' ~ time_bucket.period ~ ' and count: ' ~ time_bucket.count ~ ' instead') %} 22 | {% elif seasonality in ['hour_of_day', 'hour_of_week'] and ((time_bucket.count != 1) or (time_bucket.period != 'hour')) %} 23 | {% do exceptions.raise_compiler_error('Hourly seasonality is supported only with time_bucket 1 hour, got period: ' ~ time_bucket.period ~ ' and count: ' ~ time_bucket.count ~ ' instead') %} 24 | {% endif %} 25 | {% endif %} 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /macros/edr/tests/test_freshness_anomalies.sql: -------------------------------------------------------------------------------- 1 | {% test freshness_anomalies(model, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, detection_delay, anomaly_exclude_metrics, detection_period, training_period) %} 2 | {{ config(tags = ['elementary-tests']) }} 3 | {{ elementary.test_table_anomalies( 4 | model=model, 5 | table_anomalies=["freshness"], 6 | freshness_column=none, 7 | timestamp_column=timestamp_column, 8 | where_expression=where_expression, 9 | anomaly_sensitivity=anomaly_sensitivity, 10 | min_training_set_size=min_training_set_size, 11 | time_bucket=time_bucket, 12 | days_back=days_back, 13 | backfill_days=backfill_days, 14 | mandatory_params=['timestamp_column'], 15 | seasonality=seasonality, 16 | sensitivity=sensitivity, 17 | ignore_small_changes=ignore_small_changes, 18 | detection_delay=detection_delay, 19 | anomaly_exclude_metrics=anomaly_exclude_metrics, 20 | detection_period=detection_period, 21 | training_period=training_period 22 | ) 23 | }} 24 | {% endtest %} 25 | -------------------------------------------------------------------------------- /macros/edr/tests/test_unstructured_data_validation.sql: -------------------------------------------------------------------------------- 1 | {% test unstructured_data_validation(model, column_name, expectation_prompt, llm_model_name=none) %} 2 | {{ config(tags = ['elementary-tests']) }} 3 | {% set prompt_context = "You are a data validator specializing in validating unstructured data." %} 4 | {{ return(elementary.test_ai_data_validation(model, column_name, expectation_prompt, llm_model_name, prompt_context)) }} 5 | {% endtest %} -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/clean_up_tables.sql: -------------------------------------------------------------------------------- 1 | {% macro clean_up_tables(relations) %} 2 | {{ return(adapter.dispatch('clean_up_tables', 'elementary')(relations)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__clean_up_tables(relations) %} 6 | {# Default implementation does nothing #} 7 | {% endmacro %} 8 | 9 | {% macro athena__clean_up_tables(relations) %} 10 | {% for relation in relations %} 11 | {% do adapter.clean_up_table(relation) %} 12 | {% endfor %} 13 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/create_elementary_test_table.sql: -------------------------------------------------------------------------------- 1 | {% macro create_elementary_test_table(database_name, schema_name, test_name, table_type, sql_query) %} 2 | {% if execute %} 3 | {% set temp_table_name = elementary.table_name_with_suffix(test_name, "__" ~ table_type ~ elementary.get_timestamped_table_suffix()).replace("*", "") %} 4 | 5 | {% set default_identifier_quoting = api.Relation.get_default_quote_policy().get_part("identifier") %} 6 | {% if not adapter.config.quoting.get("identifier", default_identifier_quoting) %} 7 | {% set temp_table_name = adapter.quote(temp_table_name) %} 8 | {% endif %} 9 | 10 | {{ elementary.debug_log(table_type ~ ' table: ' ~ database_name ~ '.' ~ schema_name ~ '.' ~ temp_table_name) }} 11 | 12 | {% set _, temp_table_relation = dbt.get_or_create_relation(database=database_name, 13 | schema=schema_name, 14 | identifier=temp_table_name, 15 | type='table') -%} 16 | 17 | {# Create the table if it doesnt exist #} 18 | {%- do elementary.create_or_replace(false, temp_table_relation, sql_query) %} 19 | 20 | {# Cache the test table for easy access later #} 21 | {% set test_entry = elementary.get_cache("temp_test_table_relations_map").setdefault(test_name, {}) %} 22 | {% do test_entry.update({table_type: temp_table_relation}) %} 23 | {{ return(temp_table_relation) }} 24 | {% endif %} 25 | {{ return(none) }} 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/create_model_baseline_table.sql: -------------------------------------------------------------------------------- 1 | {% macro create_model_baseline_table(baseline_columns, database_name, schema_name, test_name) %} 2 | {% set empty_table_query = elementary.empty_table([('column_name','string'),('data_type','string')]) %} 3 | {% set baseline_table_relation = elementary.create_elementary_test_table(database_name, schema_name, 4 | test_name | lower, 'schema_baseline', 5 | empty_table_query) %} 6 | {% do elementary.insert_rows(baseline_table_relation, baseline_columns, should_commit=True, chunk_size=elementary.get_config_var('dbt_artifacts_chunk_size')) %} 7 | {% do return(baseline_table_relation) %} 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql: -------------------------------------------------------------------------------- 1 | {% macro find_normalized_data_type_for_column(model_relation, column_name) %} 2 | 3 | {% set columns_from_relation = adapter.get_columns_in_relation(model_relation) %} 4 | {% if column_name and columns_from_relation and columns_from_relation is iterable %} 5 | {% for column_obj in columns_from_relation %} 6 | {% if column_obj.column | lower == column_name | trim('\'\"\`') | lower %} 7 | {{ return(elementary.normalize_data_type(elementary.get_column_data_type(column_obj))) }} 8 | {% endif %} 9 | {% endfor %} 10 | {% do exceptions.raise_compiler_error("Column `{}` was not found in `{}`.".format(column_name, model_relation.name)) %} 11 | {% endif %} 12 | {{ return(none) }} 13 | 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_elementary_test_table.sql: -------------------------------------------------------------------------------- 1 | {% macro get_elementary_test_table(test_name, table_type) %} 2 | {% if execute %} 3 | {% set test_entry = elementary.get_cache("temp_test_table_relations_map").setdefault(test_name, {}) %} 4 | {% do return(test_entry.get(table_type)) %} 5 | {% endif %} 6 | {% do return(none) %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_elementary_test_table_name.sql: -------------------------------------------------------------------------------- 1 | {% macro get_elementary_test_table_name() %} 2 | {% set test_node = model %} 3 | {% set test_hash = test_node.unique_id.split(".")[-1] %} 4 | {% set test_name = test_node.name %} 5 | {% do return("test_{}_{}".format(test_hash, test_name)) %} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_elementary_tests_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro get_elementary_tests_schema(elementary_database, elementary_schema) %} 2 | {% set LEGACY_TESTS_SCHEMA_SUFFIX = "__tests" %} 3 | 4 | {% set cached_tests_schema_name = elementary.get_cache("tests_schema_name") %} 5 | {% if cached_tests_schema_name is not none %} 6 | {{ return(cached_tests_schema_name) }} 7 | {% endif %} 8 | 9 | {% set tests_schema_suffix = elementary.get_config_var('tests_schema_name') %} 10 | {% set tests_schema_name = elementary_schema ~ tests_schema_suffix %} 11 | 12 | {# Backward compatibility - if a tests schema suffix is not defined, but the legacy tests schema exists in the DB, 13 | then use it #} 14 | {% if not tests_schema_suffix %} 15 | {% set legacy_tests_schema_name = elementary_schema ~ LEGACY_TESTS_SCHEMA_SUFFIX %} 16 | {% if adapter.check_schema_exists(elementary_database, legacy_tests_schema_name) %} 17 | {% set tests_schema_name = legacy_tests_schema_name %} 18 | {% endif %} 19 | {% endif %} 20 | 21 | {% do elementary.set_cache("tests_schema_name", tests_schema_name) %} 22 | 23 | {{ return(tests_schema_name) }} 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_model_graph_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_model_graph_node(model_relation) %} 2 | {% if execute %} 3 | {# model here is actually the test node in the graph #} 4 | {% set test_graph_node = model %} 5 | {% set test_depends_on_unique_ids = test_graph_node.depends_on.nodes %} 6 | {# model relation is the relation object of the model where the test is defined #} 7 | {% set relation_name = model_relation.name | lower %} 8 | {% set depends_on_nodes = elementary.get_nodes_by_unique_ids(test_depends_on_unique_ids) %} 9 | {% if depends_on_nodes %} 10 | {% for node in depends_on_nodes %} 11 | {% set node_name = node.name | lower %} 12 | {% set node_alias = node.get('alias', '') | lower %} 13 | {% set node_identifier = node.get('identifier', '') | lower %} 14 | {% if node_name == relation_name or node_alias == relation_name or node_identifier == relation_name %} 15 | {{ return(node) }} 16 | {% endif %} 17 | {% endfor %} 18 | {% endif %} 19 | {% endif %} 20 | {{ return(none) }} 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_test_execution_id.sql: -------------------------------------------------------------------------------- 1 | {% macro get_test_execution_id() %} 2 | {{ return(elementary.get_node_execution_id(model)) }} 3 | {% endmacro %} -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_test_type.sql: -------------------------------------------------------------------------------- 1 | {% macro get_test_type(flattened_test) %} 2 | {% if flattened_test.test_namespace == "elementary" %} 3 | {%- set elementary_test_type = elementary.get_elementary_test_type(flattened_test) %} 4 | {% endif %} 5 | {% do return(elementary_test_type or "dbt_test") %} 6 | {% endmacro %} 7 | 8 | {% macro get_elementary_test_type(flattened_test) %} 9 | {%- set anomaly_detection_tests = [ 10 | 'volume_anomalies', 11 | 'freshness_anomalies', 12 | 'event_freshness_anomalies', 13 | 'table_anomalies', 14 | 'dimension_anomalies', 15 | 'column_anomalies', 16 | 'all_columns_anomalies' 17 | ] %} 18 | {%- set schema_changes_tests = [ 19 | 'schema_changes', 20 | 'schema_changes_from_baseline', 21 | 'json_schema', 22 | ] %} 23 | 24 | {% if flattened_test.short_name | lower in anomaly_detection_tests %} 25 | {% do return("anomaly_detection") %} 26 | {% elif flattened_test.short_name | lower in schema_changes_tests %} 27 | {% do return("schema_change") %} 28 | {% endif %} 29 | {% do return(none) %} 30 | {% endmacro %} 31 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/get_test_unique_id.sql: -------------------------------------------------------------------------------- 1 | {% macro get_test_unique_id() %} 2 | {% set test_unique_id = model.get('unique_id') %} 3 | {{ return(test_unique_id) }} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/run_python.sql: -------------------------------------------------------------------------------- 1 | {% macro run_python(graph_node, code) %} 2 | {% set macro_stack = context_macro_stack.call_stack %} 3 | {% set macro_stack_copy = macro_stack.copy() %} 4 | {% do macro_stack.clear() %} 5 | {% do macro_stack.extend([["materialization"], "macro.dbt.statement"]) %} 6 | {% do submit_python_job(graph_node, code) %} 7 | {% do macro_stack.clear() %} 8 | {% do macro_stack.extend(macro_stack_copy) %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/edr/tests/test_utils/validate_unique_metric_names.sql: -------------------------------------------------------------------------------- 1 | {% macro validate_unique_metric_names(metrics) %} 2 | {% set metric_names = [] %} 3 | {% for metric in metrics %} 4 | {% if not metric.name %} 5 | {% do exceptions.raise_compiler_error("The 'name' argument is required for each metric.") %} 6 | {% endif %} 7 | {% if metric.name in metric_names %} 8 | {% do exceptions.raise_compiler_error("The metric '{}' is already defined.".format(metric.name)) %} 9 | {% endif %} 10 | {% do metric_names.append(metric.name) %} 11 | {% endfor %} 12 | 13 | {% set test_node = context["model"] %} 14 | {% set parent_model_unique_ids = elementary.get_parent_model_unique_ids_from_test_node(test_node) %} 15 | 16 | {% for graph_node in graph.nodes.values() %} 17 | {% if test_node.unique_id != graph_node.unique_id and graph_node.resource_type == "test" %} 18 | {% set test_metadata = elementary.safe_get_with_default(graph_node, 'test_metadata', {}) %} 19 | {% if test_metadata.namespace == "elementary" and test_metadata.name == "collect_metrics" %} 20 | {% set test_parent_model_unique_ids = elementary.get_parent_model_unique_ids_from_test_node(graph_node) %} 21 | {% if parent_model_unique_ids == test_parent_model_unique_ids %} 22 | {% for metric in test_metadata.kwargs.metrics %} 23 | {% if metric.name in metric_names %} 24 | {% do exceptions.raise_compiler_error("The metric '{}' is already defined.".format(metric.name)) %} 25 | {% endif %} 26 | {% endfor %} 27 | {% endif %} 28 | {% endif %} 29 | {% endif %} 30 | {% endfor %} 31 | {% endmacro %} 32 | -------------------------------------------------------------------------------- /macros/edr/tests/test_volume_anomalies.sql: -------------------------------------------------------------------------------- 1 | {% test volume_anomalies(model, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period) %} 2 | {{ config(tags = ['elementary-tests']) }} 3 | 4 | {{ elementary.test_table_anomalies( 5 | model=model, 6 | table_anomalies=["row_count"], 7 | freshness_column=none, 8 | timestamp_column=timestamp_column, 9 | where_expression=where_expression, 10 | anomaly_sensitivity=anomaly_sensitivity, 11 | anomaly_direction=anomaly_direction, 12 | min_training_set_size=min_training_set_size, 13 | time_bucket=time_bucket, 14 | days_back=days_back, 15 | backfill_days=backfill_days, 16 | seasonality=seasonality, 17 | sensitivity=sensitivity, 18 | ignore_small_changes=ignore_small_changes, 19 | fail_on_zero=fail_on_zero, 20 | detection_delay=detection_delay, 21 | anomaly_exclude_metrics=anomaly_exclude_metrics, 22 | detection_period=detection_period, 23 | training_period=training_period 24 | ) 25 | }} 26 | {% endtest %} 27 | -------------------------------------------------------------------------------- /macros/materializations/non_dbt.sql: -------------------------------------------------------------------------------- 1 | {# Materialization that does not create any table at the end of its run #} 2 | {# An example for a usage case is when we want a model to appear on the Elementary lineage graph, but the table is created outside of dbt #} 3 | {% materialization non_dbt, default -%} 4 | {# The main statement executes the model, but does not create any table / view on the DWH #} 5 | {% call statement('main') -%} 6 | {{ sql }} 7 | {%- endcall %} 8 | {{ adapter.commit() }} 9 | {{ return({'relations': []}) }} 10 | {% endmaterialization %} 11 | -------------------------------------------------------------------------------- /macros/utils/command_type_utils.sql: -------------------------------------------------------------------------------- 1 | {% macro is_test_command() %} 2 | {% do return(flags.WHICH in ['test', 'build', 'retry']) %} 3 | {% endmacro %} 4 | 5 | {% macro is_run_command() %} 6 | {% do return(flags.WHICH in ['run', 'build', 'retry']) %} 7 | {% endmacro %} 8 | 9 | {% macro is_docs_command() %} 10 | {% do return(flags.WHICH in ['generate', 'serve']) %} 11 | {% endmacro %} 12 | 13 | {% macro is_freshness_command() %} 14 | {% do return(flags.WHICH in ['freshness']) %} 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/can_query_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro can_query_relation(relation) %} 2 | {% do return(adapter.dispatch("can_query_relation", "elementary")(relation)) %} 3 | {% endmacro %} 4 | 5 | {% macro bigquery__can_query_relation(relation) %} 6 | {% set query %} 7 | begin 8 | select 1 9 | from {{ relation }} 10 | limit 1; 11 | exception when error then 12 | select 0; 13 | end 14 | {% endset %} 15 | {% set result = elementary.result_value(query) %} 16 | {% do return(result == 1) %} 17 | {% endmacro %} 18 | 19 | {% macro default__can_query_relation(relation) %} 20 | {% do exceptions.raise_compiler_error("'can_query_relation' not implemented on '{}'.".format(target.type)) %} 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/concat.sql: -------------------------------------------------------------------------------- 1 | {%- macro edr_concat(val1, val2) -%} 2 | concat({{ elementary.edr_cast_as_string(val1) }}, {{ elementary.edr_cast_as_string(val2) }}) 3 | {%- endmacro -%} -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/contains.sql: -------------------------------------------------------------------------------- 1 | {% macro contains(string, string_to_search,case_sensitive=False) -%} 2 | {{ adapter.dispatch('contains', 'elementary') (string, string_to_search, case_sensitive) }} 3 | {%- endmacro %} 4 | 5 | {# Snowflake, Databricks #} 6 | {% macro default__contains(string, string_to_search, case_sensitive) %} 7 | {%- if case_sensitive %} 8 | contains({{ string }}, '{{ string_to_search }}') 9 | {%- else %} 10 | contains(lower({{ string }}), lower('{{ string_to_search }}')) 11 | {%- endif %} 12 | {% endmacro %} 13 | 14 | {% macro bigquery__contains(string, string_to_search, case_sensitive) %} 15 | {%- if case_sensitive %} 16 | contains_substr({{ string }}, '{{ string_to_search }}') 17 | {%- else %} 18 | {%- set string_to_search = string_to_search | lower %} 19 | contains_substr(lower({{ string }}), '{{ string_to_search }}') 20 | {%- endif %} 21 | {% endmacro %} 22 | 23 | {% macro postgres__contains(string, string_to_search, case_sensitive) %} 24 | {%- if case_sensitive %} 25 | case when 26 | {{ string }} like '%{{ string_to_search }}%' then true 27 | else false end 28 | {%- else %} 29 | case when 30 | lower({{ string }}) like lower('%{{ string_to_search }}%') then true 31 | else false end 32 | {%- endif %} 33 | {% endmacro %} 34 | 35 | {% macro athena__contains(string, string_to_search, case_sensitive) %} 36 | {%- if case_sensitive %} 37 | case when 38 | {{ string }} like '%{{ string_to_search }}%' then true 39 | else false end 40 | {%- else %} 41 | case when 42 | lower({{ string }}) like lower('%{{ string_to_search }}%') then true 43 | else false end 44 | {%- endif %} 45 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/date_trunc.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_date_trunc(date_part, date_expression) -%} 2 | {{ return(adapter.dispatch('edr_date_trunc', 'elementary') (date_part, date_expression)) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__edr_date_trunc(datepart, date_expression) %} 6 | {% set macro = dbt.date_trunc or dbt_utils.date_trunc %} 7 | {% if not macro %} 8 | {{ exceptions.raise_compiler_error("Did not find a `date_trunc` macro.") }} 9 | {% endif %} 10 | {{ return(macro(datepart, date_expression)) }} 11 | {% endmacro %} 12 | 13 | {# Bigquery date_trunc does not support timestamp expressions and date parts smaller than day #} 14 | {% macro bigquery__edr_date_trunc(date_part, date_expression) %} 15 | timestamp_trunc(cast({{ date_expression }} as timestamp), {{ date_part }}) 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/dateadd.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_dateadd(datepart, interval, from_date_or_timestamp) %} 2 | {% set macro = dbt.dateadd or dbt_utils.dateadd %} 3 | {% if not macro %} 4 | {{ exceptions.raise_compiler_error("Did not find a `dateadd` macro.") }} 5 | {% endif %} 6 | {{ return(macro(datepart, interval, from_date_or_timestamp)) }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/day_of_week.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_day_of_week_expression(date_expr) %} 2 | {{ return(adapter.dispatch('edr_day_of_week_expression','elementary')(elementary.edr_cast_as_date(date_expr))) }} 3 | {% endmacro %} 4 | 5 | {# Databricks, Spark: #} 6 | {% macro default__edr_day_of_week_expression(date_expr) %} 7 | DATE_FORMAT({{ date_expr }}, 'EEEE') 8 | {% endmacro %} 9 | 10 | {% macro bigquery__edr_day_of_week_expression(date_expr) %} 11 | FORMAT_DATE('%A', {{ date_expr }}) 12 | {% endmacro %} 13 | 14 | {% macro postgres__edr_day_of_week_expression(date_expr) %} 15 | to_char({{ date_expr }}, 'Day') 16 | {% endmacro %} 17 | 18 | {% macro redshift__edr_day_of_week_expression(date_expr) %} 19 | {# Redshift returns the days padded with whitespaces to width of 9 #} 20 | trim(' ' FROM to_char({{ date_expr }}, 'Day')) 21 | {% endmacro %} 22 | 23 | {% macro snowflake__edr_day_of_week_expression(date_expr) %} 24 | {# copied from Snowflake help docs: https://docs.snowflake.com/en/user-guide/date-time-examples #} 25 | DECODE (EXTRACT('dayofweek',{{ date_expr }}), 26 | 1 , 'Monday', 27 | 2 , 'Tuesday', 28 | 3 , 'Wednesday', 29 | 4 , 'Thursday', 30 | 5 , 'Friday', 31 | 6 , 'Saturday', 32 | 0 , 'Sunday' 33 | ) 34 | {% endmacro %} 35 | 36 | {% macro athena__edr_day_of_week_expression(date_expr) %} 37 | DATE_FORMAT({{ date_expr }}, '%W') 38 | {% endmacro %} 39 | 40 | {% macro trino__edr_day_of_week_expression(date_expr) %} 41 | date_format({{ date_expr }}, '%W') 42 | {% endmacro %} 43 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/hour_of_day.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_hour_of_day_expression(date_expr) %} 2 | {{ return(adapter.dispatch('edr_hour_of_day_expression','elementary')(elementary.edr_cast_as_timestamp(date_expr))) }} 3 | {% endmacro %} 4 | 5 | {# Databricks, Spark, Athena, Trino: #} 6 | {% macro default__edr_hour_of_day_expression(date_expr) %} 7 | HOUR({{ date_expr }}) 8 | {% endmacro %} 9 | 10 | {% macro bigquery__edr_hour_of_day_expression(date_expr) %} 11 | EXTRACT(hour from {{ date_expr }}) 12 | {% endmacro %} 13 | 14 | {% macro postgres__edr_hour_of_day_expression(date_expr) %} 15 | EXTRACT(hour from {{ date_expr }}) 16 | {% endmacro %} 17 | 18 | {% macro redshift__edr_hour_of_day_expression(date_expr) %} 19 | EXTRACT(hour from {{ date_expr }}) 20 | {% endmacro %} 21 | 22 | {% macro snowflake__edr_hour_of_day_expression(date_expr) %} 23 | HOUR({{ date_expr }}) 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/incremental_strategy.sql: -------------------------------------------------------------------------------- 1 | {% macro get_default_incremental_strategy() %} 2 | {% do return(adapter.dispatch("get_default_incremental_strategy", "elementary")()) %} 3 | {% endmacro %} 4 | 5 | {%- macro athena__get_default_incremental_strategy() %} 6 | {% do return("merge") %} 7 | {% endmacro %} 8 | 9 | {%- macro trino__get_default_incremental_strategy() %} 10 | {% do return("merge") %} 11 | {% endmacro %} 12 | 13 | {% macro default__get_default_incremental_strategy() %} 14 | {% do return(none) %} 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/lag.sql: -------------------------------------------------------------------------------- 1 | {% macro lag(column, offset=1) %} 2 | {{ return(adapter.dispatch('lag', 'elementary')(column, offset)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__lag(column, offset=1) %} 6 | lag({{ column }}, {{ offset }}) 7 | {% endmacro %} 8 | 9 | {% macro clickhouse__lag(column, offset=1) %} 10 | lagInFrame({{ column }}, {{ offset }}) 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/multi_value_in.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_multi_value_in(source_cols, target_cols, target_table) %} 2 | {% do return(adapter.dispatch('edr_multi_value_in', 'elementary') (source_cols, target_cols, target_table)) %} 3 | {% endmacro %} 4 | 5 | {%- macro default__edr_multi_value_in(source_cols, target_cols, target_table) -%} 6 | ( 7 | {%- for val in source_cols -%} 8 | {{ val }} 9 | {%- if not loop.last %}, {% endif %} 10 | {%- endfor %} 11 | ) in ( 12 | select {% for val in target_cols -%} 13 | {{ val }} 14 | {%- if not loop.last %}, {% endif %} 15 | {%- endfor %} 16 | from {{ target_table }} 17 | ) 18 | {%- endmacro -%} 19 | 20 | {%- macro bigquery__edr_multi_value_in(source_cols, target_cols, target_table) -%} 21 | -- BigQuery doesn't support multi-value IN, so we emulate it with CONCAT 22 | concat( 23 | {%- for val in source_cols -%} 24 | {{ elementary.edr_cast_as_string(val) -}} 25 | {%- if not loop.last %}, {% endif %} 26 | {%- endfor %} 27 | ) in ( 28 | select concat({%- for val in target_cols -%} 29 | {{ elementary.edr_cast_as_string(val) -}} 30 | {%- if not loop.last %}, {% endif %} 31 | {%- endfor %}) 32 | from {{ target_table }} 33 | ) 34 | {%- endmacro -%} 35 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/quote_column.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro edr_quote(str) %} 3 | {% set escaped_str = elementary.escape_special_chars(str) %} 4 | {% do return("'{}'".format(escaped_str)) %} 5 | {% endmacro %} 6 | 7 | {% macro dict_to_quoted_json(d) %} 8 | {% do return(elementary.edr_cast_as_string(elementary.edr_quote(tojson(d, sort_keys=true)))) %} 9 | {% endmacro %} 10 | 11 | {%- macro edr_quote_column(column_name) -%} 12 | {% if adapter.quote(column_name[1:-1]) == column_name %} 13 | {{ return(column_name) }} 14 | {% else %} 15 | {% set quoted_column = adapter.quote(column_name) %} 16 | {{ return(quoted_column) }} 17 | {% endif %} 18 | {%- endmacro -%} 19 | 20 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/safe_cast.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_safe_cast(field, type) %} 2 | {{ return(adapter.dispatch('edr_safe_cast', 'elementary') (field, type)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__edr_safe_cast(field, type) %} 6 | {% set macro = dbt.safe_cast or dbt_utils.safe_cast %} 7 | {% if not macro %} 8 | {{ exceptions.raise_compiler_error("Did not find a `safe_cast` macro.") }} 9 | {% endif %} 10 | {{ return(macro(field, type)) }} 11 | {% endmacro %} 12 | 13 | {% macro spark__edr_safe_cast(field, type) %} 14 | try_cast({{field}} as {{type}}) 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/schema_exists.sql: -------------------------------------------------------------------------------- 1 | {% macro schema_exists(database, schema) %} 2 | {% do return(adapter.dispatch("schema_exists", "elementary")(database, schema)) %} 3 | {% endmacro %} 4 | 5 | {% macro snowflake__schema_exists(database, schema) %} 6 | {% set database_sql %} 7 | show databases like '{{ database }}' 8 | {% endset %} 9 | {% set database_exists = elementary.run_query(database_sql)|length > 0 %} 10 | {% if not database_exists %} 11 | {% do return(false) %} 12 | {% endif %} 13 | {% do return(adapter.check_schema_exists(database, schema)) %} 14 | {% endmacro %} 15 | 16 | {% macro postgres__schema_exists(database, schema) %} 17 | {% if database != target.database %} 18 | {# Cross db operations not supported in postgres #} 19 | {% do return(false) %} 20 | {% endif %} 21 | {% do return(adapter.check_schema_exists(database, schema)) %} 22 | {% endmacro %} 23 | 24 | {% macro bigquery__schema_exists(database, schema) %} 25 | {% if database != target.project %} 26 | {# Cannot check for non-existing database in bigquery through sql (only api), assume it exists #} 27 | {% do return(true) %} 28 | {% endif %} 29 | {% do return(adapter.check_schema_exists(database, schema)) %} 30 | {% endmacro %} 31 | 32 | {% macro default__schema_exists(database, schema) %} 33 | {% do return(adapter.check_schema_exists(database, schema)) %} 34 | {% endmacro %} 35 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/sql_union_distinct.sql: -------------------------------------------------------------------------------- 1 | {% macro sql_union_distinct() %} 2 | union {% if target.type == "bigquery" %} distinct {% endif %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/table_type.sql: -------------------------------------------------------------------------------- 1 | {% macro get_default_table_type() %} 2 | {% do return(adapter.dispatch("get_default_table_type", "elementary")()) %} 3 | {% endmacro %} 4 | 5 | {%- macro athena__get_default_table_type() %} 6 | {% do return("iceberg") %} 7 | {% endmacro %} 8 | 9 | {% macro default__get_default_table_type() %} 10 | {% do return(none) %} 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/target_database.sql: -------------------------------------------------------------------------------- 1 | {% macro target_database() -%} 2 | {{ return(adapter.dispatch('target_database', 'elementary')()) }} 3 | {%- endmacro %} 4 | 5 | -- Postgres and Redshift 6 | {% macro default__target_database() %} 7 | {% do return(target.dbname) %} 8 | {% endmacro %} 9 | 10 | {% macro spark__target_database() %} 11 | {% do return(target.catalog or none) %} 12 | {% endmacro %} 13 | 14 | {% macro snowflake__target_database() %} 15 | {% do return(target.database) %} 16 | {% endmacro %} 17 | 18 | {% macro bigquery__target_database() %} 19 | {% do return(target.project) %} 20 | {% endmacro %} 21 | 22 | {% macro athena__target_database() %} 23 | {% do return(target.database) %} 24 | {% endmacro %} 25 | 26 | {% macro trino__target_database() %} 27 | {% do return(target.database) %} 28 | {% endmacro %} 29 | 30 | {% macro clickhouse__target_database() %} 31 | {% do return(target.schema) %} 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/time_trunc.sql: -------------------------------------------------------------------------------- 1 | {# Same as date trunc, but casts the time/date expression to timestamp #} 2 | {% macro edr_time_trunc(date_part, date_expression) -%} 3 | {{ return(adapter.dispatch('edr_time_trunc', 'elementary') (date_part, date_expression)) }} 4 | {%- endmacro %} 5 | 6 | {% macro default__edr_time_trunc(date_part, date_expression) %} 7 | date_trunc('{{date_part}}', cast({{ date_expression }} as {{ elementary.edr_type_timestamp() }})) 8 | {% endmacro %} 9 | 10 | {% macro bigquery__edr_time_trunc(date_part, date_expression) %} 11 | timestamp_trunc(cast({{ date_expression }} as timestamp), {{ date_part }}) 12 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/timediff.sql: -------------------------------------------------------------------------------- 1 | {# Same as datediff, but supports timestamps as well and not just dates #} 2 | {% macro timediff(timepart, first_timestamp, second_timestamp) -%} 3 | {{ return(adapter.dispatch('timediff', 'elementary')(timepart, first_timestamp, second_timestamp)) }} 4 | {%- endmacro %} 5 | 6 | {# For Snowflake, Databricks, Redshift, Postgres & Spark #} 7 | {# the dbt adapter implementation supports both timestamp and dates #} 8 | {% macro default__timediff(timepart, first_timestamp, second_timestamp) %} 9 | {{ elementary.edr_datediff(first_timestamp, second_timestamp, timepart)}} 10 | {% endmacro %} 11 | 12 | {% macro bigquery__timediff(timepart, first_timestamp, second_timestamp) %} 13 | timestamp_diff({{ second_timestamp }}, {{ first_timestamp }}, {{ timepart }}) 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/utils/cross_db_utils/to_char.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_to_char(column, format) -%} 2 | {{ return(adapter.dispatch('edr_to_char', 'elementary')(column, format)) }} 3 | {%- endmacro %} 4 | 5 | {# Snowflake and Redshift/Postgres #} 6 | {% macro default__edr_to_char(column, format) %} 7 | to_char({{ column }} {%- if format %}, '{{ format }}'){%- else %}, 'YYYY-MM-DD HH:MI:SS'){%- endif %} 8 | {% endmacro %} 9 | 10 | {% macro bigquery__edr_to_char(column, format) %} 11 | cast({{ column }} as STRING {%- if format %} FORMAT '{{ format }}'){%- else %}){%- endif %} 12 | {% endmacro %} 13 | 14 | {% macro spark__edr_to_char(column, format) %} 15 | date_format({{ column }} {%- if format %}, '{{ format }}'){%- else %}, 'YYYY-MM-DD HH:MI:SS'){%- endif %} 16 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/data_types/data_size.sql: -------------------------------------------------------------------------------- 1 | {% macro get_column_size() %} 2 | {{ return(adapter.dispatch('get_column_size', 'elementary')()) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_column_size() %} 6 | {{ return(elementary.get_config_var('long_string_size')) }} 7 | {% endmacro %} 8 | 9 | {% macro postgres__get_column_size() %} 10 | {{ return(none) }} 11 | {% endmacro %} 12 | 13 | {% macro snowflake__get_column_size() %} 14 | {{ return(16777216) }} 15 | {% endmacro %} 16 | 17 | {% macro bigquery__get_column_size() %} 18 | {{ return(10485760) }} 19 | {% endmacro %} 20 | 21 | {% macro redshift__get_column_size() %} 22 | {{ return(65535) }} 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /macros/utils/data_types/get_column_data_type.sql: -------------------------------------------------------------------------------- 1 | {% macro get_column_data_type(column_relation) %} 2 | {% set data_type = adapter.dispatch('get_column_data_type','elementary')(column_relation) %} 3 | {{ return(data_type) }} 4 | {% endmacro %} 5 | 6 | {% macro default__get_column_data_type(column_relation) %} 7 | {{return (column_relation["dtype"]) }} 8 | {% endmacro %} 9 | 10 | {% macro bigquery__get_column_data_type(column_relation) %} 11 | {{return (column_relation["data_type"]) }} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/utils/data_types/is_column_timestamp.sql: -------------------------------------------------------------------------------- 1 | {% macro is_column_timestamp(table_relation,timestamp_column,timestamp_column_data_type) %} 2 | {%- if timestamp_column_data_type == 'string' %} 3 | {%- set is_timestamp = elementary.try_cast_column_to_timestamp(table_relation, timestamp_column) %} 4 | {%- elif timestamp_column_data_type == 'timestamp' %} 5 | {%- set is_timestamp = true %} 6 | {%- else %} 7 | {%- set is_timestamp = false %} 8 | {%- endif %} 9 | {{ return(is_timestamp) }} 10 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/data_types/normalize_data_type.sql: -------------------------------------------------------------------------------- 1 | {% macro normalize_data_type(data_type) %} 2 | 3 | {# In case data type has precision info - e.g. decimal is in the format decimal(p,s) #} 4 | {%- if '(' in data_type %} 5 | {%- set data_type = data_type.split('(')[0] %} 6 | {%- endif %} 7 | 8 | {%- if data_type is defined and data_type is not none %} 9 | {%- if data_type in elementary.data_type_list('string') %} 10 | {{ return('string') }} 11 | {%- elif data_type in elementary.data_type_list('numeric') %} 12 | {{ return('numeric') }} 13 | {%- elif data_type in elementary.data_type_list('timestamp') %} 14 | {{ return('timestamp') }} 15 | {%- elif data_type in elementary.data_type_list("boolean") %} 16 | {{ return("boolean") }} 17 | {%- else %} 18 | {{ return('other') }} 19 | {% endif %} 20 | {%- endif %} 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /macros/utils/data_types/null_as.sql: -------------------------------------------------------------------------------- 1 | {%- macro null_int() -%} 2 | {{ return(adapter.dispatch('null_int', 'elementary')()) }} 3 | {%- endmacro -%} 4 | 5 | {%- macro default__null_int() -%} 6 | cast(null as {{ elementary.edr_type_int() }}) 7 | {%- endmacro -%} 8 | 9 | {%- macro clickhouse__null_int() -%} 10 | cast(null as Nullable({{ elementary.edr_type_int() }})) 11 | {%- endmacro -%} 12 | 13 | {%- macro null_timestamp() -%} 14 | {{ return(adapter.dispatch('null_timestamp', 'elementary')()) }} 15 | {%- endmacro -%} 16 | 17 | {%- macro default__null_timestamp() -%} 18 | cast(null as {{ elementary.edr_type_timestamp() }}) 19 | {%- endmacro -%} 20 | 21 | {%- macro clickhouse__null_timestamp() -%} 22 | cast(null as Nullable({{ elementary.edr_type_timestamp() }})) 23 | {%- endmacro -%} 24 | 25 | {%- macro null_float() -%} 26 | cast(null as {{ elementary.edr_type_float() }}) 27 | {%- endmacro -%} 28 | 29 | {% macro null_string() %} 30 | {{ return(adapter.dispatch('null_string', 'elementary')()) }} 31 | {% endmacro %} 32 | 33 | {% macro default__null_string() %} 34 | cast(null as {{ elementary.edr_type_string() }}) 35 | {% endmacro %} 36 | 37 | {% macro clickhouse__null_string() %} 38 | cast(null as Nullable({{ elementary.edr_type_string() }})) 39 | {% endmacro %} 40 | 41 | {% macro null_boolean() %} 42 | cast(null as {{ elementary.edr_type_bool() }}) 43 | {% endmacro %} 44 | -------------------------------------------------------------------------------- /macros/utils/data_types/to_primitive.sql: -------------------------------------------------------------------------------- 1 | {% macro to_primitive(val) %} 2 | {% if elementary.is_primitive(val) %} 3 | {% do return(val) %} 4 | 5 | {% elif val is mapping %} 6 | {% set new_dict = {} %} 7 | {% for k, v in val.items() %} 8 | {% do new_dict.update({k: elementary.to_primitive(v)}) %} 9 | {% endfor %} 10 | {% do return(new_dict) %} 11 | 12 | {% elif val is iterable %} 13 | {% set new_list = [] %} 14 | {% for item in val %} 15 | {% do new_list.append(elementary.to_primitive(item)) %} 16 | {% endfor %} 17 | {% do return(new_list) %} 18 | 19 | {% else %} 20 | {% do return(val | string) %} 21 | {% endif %} 22 | {% endmacro %} 23 | -------------------------------------------------------------------------------- /macros/utils/data_types/try_cast_column_to_timestamp.sql: -------------------------------------------------------------------------------- 1 | {% macro try_cast_column_to_timestamp(table_relation, timestamp_column) %} 2 | {{ return(adapter.dispatch('try_cast_column_to_timestamp', 'elementary')(table_relation, timestamp_column)) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__try_cast_column_to_timestamp(table_relation, timestamp_column) %} 6 | {# We try casting for Snowflake, Bigquery and Databricks as these support safe cast and the query will not fail if the cast fails #} 7 | {%- set query %} 8 | select {{ elementary.edr_safe_cast(timestamp_column, elementary.edr_type_timestamp()) }} as timestamp_column 9 | from {{ table_relation }} 10 | where {{ timestamp_column }} is not null 11 | limit 1 12 | {%- endset %} 13 | 14 | {%- set result = elementary.result_value(query) %} 15 | {%- if result is not none %} 16 | {{ return(true) }} 17 | {%- endif %} 18 | {{ return(false) }} 19 | 20 | {% endmacro %} 21 | 22 | {% macro postgres__try_cast_column_to_timestamp(table_relation, timestamp_column) %} 23 | {{ return(false) }} 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /macros/utils/dict_utils/insensitive_get_dict_value.sql: -------------------------------------------------------------------------------- 1 | {% macro insensitive_get_dict_value(dict, key, default) -%} 2 | {% set value = elementary.safe_get_with_default(dict, key) %} 3 | {%- if value is not none -%} 4 | {{- return(value) -}} 5 | {%- endif %} 6 | {% set value = elementary.safe_get_with_default(dict, key.lower()) %} 7 | {%- if value is not none -%} 8 | {{- return(value) -}} 9 | {%- endif %} 10 | {% set value = elementary.safe_get_with_default(dict, key.upper()) %} 11 | {%- if value is not none -%} 12 | {{- return(value) -}} 13 | {%- endif %} 14 | {%- if default is defined -%} 15 | {{- return(default) -}} 16 | {% else %} 17 | {{ return(none) }} 18 | {% endif %} 19 | {%- endmacro %} 20 | -------------------------------------------------------------------------------- /macros/utils/dict_utils/safe_get_with_default_value.sql: -------------------------------------------------------------------------------- 1 | {% macro safe_get_with_default(dict, key, default) %} 2 | {% set value = dict.get(key) %} 3 | {% if value is defined and value is not none %} 4 | {{ return(value) }} 5 | {% endif %} 6 | {% if default is defined %} 7 | {{ return(default) }} 8 | {% endif %} 9 | {{ return(none) }} 10 | {% endmacro %} 11 | -------------------------------------------------------------------------------- /macros/utils/dict_utils/undefined_dict_keys_to_none.sql: -------------------------------------------------------------------------------- 1 | {% macro undefined_dict_keys_to_none(dict) %} 2 | {% for key in dict %} 3 | {% if dict[key] is not defined %} 4 | {% do dict.update({key: none}) %} 5 | {% endif %} 6 | {% endfor %} 7 | {{ return(dict) }} 8 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/cache.sql: -------------------------------------------------------------------------------- 1 | {% macro set_cache(entry, val) %} 2 | {% do graph.setdefault("elementary", {}).update({entry: val}) %} 3 | {% endmacro %} 4 | 5 | {% macro get_cache(entry, default=none) %} 6 | {% do return(graph.setdefault("elementary", {}).get(entry, default)) %} 7 | {% endmacro %} 8 | 9 | {% macro setdefault_cache(entry, default=none) %} 10 | {% do return(graph.setdefault("elementary", {}).setdefault(entry, default)) %} 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /macros/utils/graph/column_exists_in_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro column_exists_in_relation(relation, column_name) %} 2 | {% set columns = adapter.get_columns_in_relation(relation) %} 3 | {% for column in columns %} 4 | {% if column.name.lower() == column_name.lower() %} 5 | {% do return(true) %} 6 | {% endif %} 7 | {% endfor %} 8 | {% do return(false) %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/utils/graph/get_compiled_code.sql: -------------------------------------------------------------------------------- 1 | {% macro get_compiled_code(node, as_column_value=false) %} 2 | {% set compiled_code = adapter.dispatch("get_compiled_code", "elementary")(node) %} 3 | 4 | {% set max_column_size = elementary.get_column_size() %} 5 | {% if as_column_value and max_column_size and compiled_code and compiled_code | length > max_column_size %} 6 | {% do return(elementary.get_compiled_code_too_long_err_msg()) %} 7 | {% endif %} 8 | 9 | {% do return(compiled_code) %} 10 | {% endmacro %} 11 | 12 | {% macro default__get_compiled_code(node) %} 13 | {% do return(node.get('compiled_code') or node.get('compiled_sql')) %} 14 | {% endmacro %} 15 | 16 | {% macro redshift__get_compiled_code(node) %} 17 | {% set compiled_code = node.get('compiled_code') or node.get('compiled_sql') %} 18 | {% if not compiled_code %} 19 | {% do return(none) %} 20 | {% else %} 21 | {% do return(compiled_code.replace("%", "%%")) %} 22 | {% endif %} 23 | {% endmacro %} 24 | 25 | {% macro get_compiled_code_too_long_err_msg() %} 26 | {% do return("Compiled code is too long.") %} 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /macros/utils/graph/get_elementary_config_from_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_elementary_config_from_node(node) %} 2 | {% set res = {} %} 3 | {% set node_config = node.get('config') %} 4 | {% if node_config %} 5 | {% set elementary_config = node.config.get('elementary') %} 6 | {% if elementary_config and elementary_config is mapping %} 7 | {% do res.update(elementary_config) %} 8 | {% endif %} 9 | {% set config_meta = node.config.get('meta') %} 10 | {% if config_meta and config_meta is mapping %} 11 | {% set elementary_config = config_meta.get('elementary') %} 12 | {% if elementary_config and elementary_config is mapping %} 13 | {% do res.update(elementary_config) %} 14 | {% endif %} 15 | {% endif %} 16 | {% endif %} 17 | {% set node_meta = node.get('meta') %} 18 | {% if node_meta and node_meta is mapping %} 19 | {% set elementary_config = node_meta.get('elementary') %} 20 | {% if elementary_config and elementary_config is mapping %} 21 | {% do res.update(elementary_config) %} 22 | {% endif %} 23 | {% endif %} 24 | {{ return(res) }} 25 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/get_elementary_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro get_elementary_relation(identifier) %} 2 | {%- if execute %} 3 | {%- set identifier_node = elementary.get_node('model.elementary.' ~ identifier) %} 4 | {%- if identifier_node -%} 5 | {%- set identifier_alias = elementary.safe_get_with_default(identifier_node, 'alias', identifier) %} 6 | {% set elementary_database, elementary_schema = identifier_node.database, identifier_node.schema %} 7 | {%- else -%} 8 | {% set identifier_alias = identifier %} 9 | {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %} 10 | {%- endif -%} 11 | {% if this and this.database == elementary_database and this.schema == elementary_schema and this.identifier == identifier_alias %} 12 | {% do return(this) %} 13 | {% endif %} 14 | {% do return(adapter.get_relation(elementary_database, elementary_schema, identifier_alias)) %} 15 | {%- endif %} 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /macros/utils/graph/get_model_database_and_schema_from_test_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_model_database_and_schema_from_test_node(test_node) %} 2 | {% set test_database = test_node.get('database') %} 3 | {% set test_schema = test_node.get('schema') %} 4 | {% set config_dict = elementary.safe_get_with_default(test_node, 'config', {}) %} 5 | {% set test_schema_sufix = config_dict.get('schema') %} 6 | {% if test_schema and test_schema_sufix %} 7 | {% set test_schema = test_schema | replace('_' ~ test_schema_sufix, '') %} 8 | {% endif %} 9 | {{ return([test_database, test_schema]) }} 10 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/get_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_node(node_unique_id) %} 2 | {# First let's try to find it in regular nodes #} 3 | {%- set node = graph.nodes.get(node_unique_id) -%} 4 | {%- if not node -%} 5 | {# If not found let's try to find it in source nodes #} 6 | {%- set node = graph.sources.get(node_unique_id) -%} 7 | {%- endif -%} 8 | {{ return(node) }} 9 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/get_node_by_name.sql: -------------------------------------------------------------------------------- 1 | {% macro get_node_by_name(name, resource_type=none) %} 2 | {%- set nodes = elementary.get_nodes_from_graph() -%} 3 | {% for node in nodes %} 4 | {% if node.name == name and (resource_type is none or node.resource_type == resource_type) %} 5 | {% do return(node) %} 6 | {% endif %} 7 | {% endfor %} 8 | {{ return(none) }} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/utils/graph/get_node_execution_id.sql: -------------------------------------------------------------------------------- 1 | {% macro get_node_execution_id(node) %} 2 | {% set node_execution_id = [invocation_id, node.get('unique_id')] | join('.') %} 3 | {{ return(node_execution_id) }} 4 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/get_nodes_by_unique_ids.sql: -------------------------------------------------------------------------------- 1 | {% macro get_nodes_by_unique_ids(unique_ids) %} 2 | {% set nodes = []%} 3 | {% if execute %} 4 | {% if unique_ids and unique_ids is iterable %} 5 | {% for unique_id in unique_ids %} 6 | {% set node = elementary.get_node(unique_id) %} 7 | {% if node %} 8 | {% do nodes.append(node) %} 9 | {% endif %} 10 | {% endfor %} 11 | {% endif %} 12 | {% endif %} 13 | {{ return(nodes) }} 14 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/get_nodes_from_graph.sql: -------------------------------------------------------------------------------- 1 | {% macro get_nodes_from_graph() %} 2 | {% set nodes = [] %} 3 | {% do nodes.extend(graph.sources.values()) %} 4 | {% do nodes.extend(graph.nodes.values()) %} 5 | {{ return(nodes) }} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /macros/utils/graph/get_package_database_and_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro get_package_database_and_schema(package_name='elementary') %} 2 | {% do return(adapter.dispatch('get_package_database_and_schema', 'elementary')(package_name)) %} 3 | {% endmacro %} 4 | 5 | {% macro default__get_package_database_and_schema(package_name='elementary') %} 6 | {% if execute %} 7 | {% set node_in_package = graph.nodes.values() 8 | | selectattr("resource_type", "==", "model") 9 | | selectattr("package_name", "==", package_name) | first %} 10 | {% if node_in_package %} 11 | {{ return([node_in_package.database, node_in_package.schema]) }} 12 | {% endif %} 13 | {% endif %} 14 | {{ return([none, none]) }} 15 | {% endmacro %} 16 | 17 | {% macro clickhouse__get_package_database_and_schema(package_name='elementary') %} 18 | {% if execute %} 19 | {% set node_in_package = graph.nodes.values() 20 | | selectattr("resource_type", "==", "model") 21 | | selectattr("package_name", "==", package_name) | first %} 22 | {% if node_in_package %} 23 | {{ return([node_in_package.schema, node_in_package.schema]) }} 24 | {% endif %} 25 | {% endif %} 26 | {{ return([none, none]) }} 27 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/get_parent_model_unique_ids_from_test_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_parent_model_unique_ids_from_test_node(test_node) %} 2 | {% set nodes_in_current_package = [] %} 3 | {% set test_depends_on = test_node.get('depends_on') %} 4 | {% if test_depends_on %} 5 | {% set depends_on_nodes = test_depends_on.get('nodes') %} 6 | {% if depends_on_nodes %} 7 | {% set current_package_name = test_node.get('package_name') %} 8 | {% if current_package_name %} 9 | {% set current_package_name = '.' ~ current_package_name ~ '.' %} 10 | {% for node in depends_on_nodes %} 11 | {% if current_package_name in node %} 12 | {% do nodes_in_current_package.append(node) %} 13 | {% endif %} 14 | {% endfor %} 15 | {% endif %} 16 | {% endif %} 17 | {% endif %} 18 | {{ return(nodes_in_current_package) }} 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /macros/utils/graph/get_relation_from_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_relation_from_node(node) %} 2 | {% do return(adapter.get_relation(database=node.database, 3 | schema=node.schema, 4 | identifier=elementary.get_table_name_from_node(node))) %} 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /macros/utils/graph/get_relevant_databases.sql: -------------------------------------------------------------------------------- 1 | {% macro get_relevant_databases() %} 2 | {% set database_names = [target.database] %} 3 | {% set models = graph.nodes.values() | selectattr('resource_type', '==', 'model') | list %} 4 | {% set sources = graph.sources.values() | selectattr('resource_type', '==', 'source') | list %} 5 | {% set nodes = models + sources %} 6 | {% for node in nodes %} 7 | {% set database_name = node.get('database') %} 8 | {% if database_name %} 9 | {% do database_names.append(database_name) %} 10 | {% endif %} 11 | {% endfor %} 12 | {% set unique_database_names = database_names | unique | list %} 13 | {% do return(unique_database_names) %} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/utils/graph/get_rendered_ref.sql: -------------------------------------------------------------------------------- 1 | {% macro get_rendered_ref(ref_string) %} 2 | {% set lowered_ref_string = ref_string | lower %} 3 | {% set match = modules.re.match("(ref\('(?P.+)'\))", lowered_ref_string, modules.re.IGNORECASE) %} 4 | {% if not match %} 5 | {% do return(lowered_ref_string) %} 6 | {% else %} 7 | {% do return(ref(match.groupdict()['ref_identifier'])['include']()) %} 8 | {% endif %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/utils/graph/get_result_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_result_node(identifier, package_name='elementary') %} 2 | {% for result in results %} 3 | {% if result.node.identifier == identifier %} 4 | {% if package_name %} 5 | {% if result.node.package_name == package_name %} 6 | {{ return(result.node) }} 7 | {% endif %} 8 | {% else %} 9 | {{ return(result.node) }} 10 | {% endif %} 11 | {% endif %} 12 | {% endfor %} 13 | {{ return(none) }} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/utils/graph/get_run_result_dict.sql: -------------------------------------------------------------------------------- 1 | {% macro get_run_result_dict(run_result) %} 2 | {% set major, minor, revision = dbt_version.split(".") %} 3 | {% set major = major | int %} 4 | {% set minor = minor | int %} 5 | {% if major < 1 or major == 1 and minor < 8 %} 6 | {% do return(run_result.to_dict()) %} 7 | {% else %} 8 | {# There's a bug in dbt 1.8 which causes run_result.to_dict to fail on an exception #} 9 | {% set timing_dicts = [] %} 10 | {% if run_result.timing %} 11 | {% for item in run_result.timing %} 12 | {% do timing_dicts.append(item.to_dict()) %} 13 | {% endfor %} 14 | {% endif %} 15 | 16 | {% do return({ 17 | 'status': run_result.status, 18 | 'message': run_result.message, 19 | 'adapter_response': run_result.adapter_response, 20 | 'failures': run_result.failures, 21 | 'execution_time': run_result.execution_time, 22 | 'timing': timing_dicts, 23 | 'node': run_result.node.to_dict() if run_result.node else None, 24 | 'thread_id': run_result.thread_id 25 | }) %} 26 | {% endif %} 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /macros/utils/graph/get_table_name_from_node.sql: -------------------------------------------------------------------------------- 1 | {% macro get_table_name_from_node(node) %} 2 | {% if node.identifier %} 3 | {% set table_name = node.identifier %} 4 | {% elif node.alias %} 5 | {% set table_name = node.alias %} 6 | {% else %} 7 | {% set table_name = node.name %} 8 | {% endif %} 9 | {{ return(table_name) }} 10 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/graph/is_ephemeral_model.sql: -------------------------------------------------------------------------------- 1 | {% macro is_ephemeral_model(model_relation) %} 2 | {% do return( 3 | model_relation.is_cte 4 | ) %} 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /macros/utils/graph/is_incremental_model.sql: -------------------------------------------------------------------------------- 1 | {% macro is_incremental_model(model_node, source_included=false) %} 2 | {% do return( 3 | (source_included and model_node.resource_type == "source") 4 | or 5 | (model_node.resource_type == "model" and model_node.config.materialized == "incremental") 6 | ) %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/utils/graph/set_cache.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/macros/utils/graph/set_cache.sql -------------------------------------------------------------------------------- /macros/utils/list_utils/filter_none_and_sort.sql: -------------------------------------------------------------------------------- 1 | {% macro filter_none_and_sort(val) %} 2 | {% do return(val | reject("none") | sort) %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/utils/list_utils/join_list.sql: -------------------------------------------------------------------------------- 1 | {% macro join_list(item_list, separator) %} 2 | {{ return(item_list | join(separator)) }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/utils/list_utils/lists_intersection.sql: -------------------------------------------------------------------------------- 1 | {% macro lists_intersection(list_a, list_b) %} 2 | {% set new_list = [] %} 3 | {% for item in list_a %} 4 | {% if item in list_b %} 5 | {% do new_list.append(item) %} 6 | {% endif %} 7 | {% endfor %} 8 | {% do return(new_list) %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/utils/list_utils/strings_list_to_tuple.sql: -------------------------------------------------------------------------------- 1 | {% macro strings_list_to_tuple(strings) %} 2 | {%- if strings %} 3 | ({% for string in strings %} '{{ string }}' {{ "," if not loop.last else "" }} {% endfor %}) 4 | {%- else %} 5 | ('') 6 | {%- endif %} 7 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/list_utils/union_lists.sql: -------------------------------------------------------------------------------- 1 | {% macro union_lists(list1, list2) %} 2 | {% set union_list = [] %} 3 | {% do union_list.extend(list1) %} 4 | {% do union_list.extend(list2) %} 5 | {{ return(union_list | unique | list) }} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /macros/utils/log_macro_results.sql: -------------------------------------------------------------------------------- 1 | {% macro log_macro_results(macro_name, macro_args=none) %} 2 | {% if macro_args is none %} 3 | {% set macro_args = {} %} 4 | {% endif %} 5 | {%- set package_and_macro_name = macro_name.split('.') %} 6 | {%- if package_and_macro_name | length == 1 %} 7 | {% set macro = context[macro_name] %} 8 | {%- elif package_and_macro_name | length == 2 %} 9 | {%- set package_name, macro_name = package_and_macro_name %} 10 | {% set macro = context[package_name][macro_name] %} 11 | {%- else %} 12 | {% do exceptions.raise_compiler_error("Received invalid macro name: {}".format(macro_name)) %} 13 | {% endif %} 14 | {% set results = macro(**macro_args) %} 15 | {% if results is not none %} 16 | {% do elementary.edr_log('--ELEMENTARY-MACRO-OUTPUT-START--' ~ tojson(results) ~ '--ELEMENTARY-MACRO-OUTPUT-END--') %} 17 | {% endif %} 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /macros/utils/missing_elementary_models.sql: -------------------------------------------------------------------------------- 1 | {% macro get_missing_elementary_models_err_msg() %} 2 | {% set elementary_db, elementary_schema = elementary.get_package_database_and_schema() %} 3 | {% do return("Missing Elementary models in '{}.{}'. Please run 'dbt run -s elementary --target {}'.".format(elementary_db, elementary_schema, target.name)) %} 4 | {% endmacro %} 5 | 6 | {% macro warn_missing_elementary_models() %} 7 | {% do exceptions.warn(elementary.get_missing_elementary_models_err_msg()) %} 8 | {% endmacro %} 9 | 10 | {% macro raise_missing_elementary_models() %} 11 | {% do exceptions.raise_compiler_error(elementary.get_missing_elementary_models_err_msg()) %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/utils/percent_query.sql: -------------------------------------------------------------------------------- 1 | {% macro edr_percent(value, total) %} 2 | {% set value = elementary.edr_cast_as_float(value) %} 3 | {% set total = elementary.edr_cast_as_float(total) %} 4 | {% set query %} 5 | round({{ elementary.edr_cast_as_numeric('{} / nullif({}, 0) * 100.0'.format(value, total)) }}, 3) 6 | {% endset %} 7 | {% do return(query) %} 8 | {% endmacro %} 9 | 10 | {% macro edr_not_percent(value, total) %} 11 | {% set value = elementary.edr_cast_as_float(value) %} 12 | {% set total = elementary.edr_cast_as_float(total) %} 13 | {% set query %} 14 | round({{ elementary.edr_cast_as_numeric('100 - ({} / nullif({}, 0) * 100.0)'.format(value, total)) }}, 3) 15 | {% endset %} 16 | {% do return(query) %} 17 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/run_queries/agate_to_dicts.sql: -------------------------------------------------------------------------------- 1 | {% macro agate_to_dicts(agate_table) %} 2 | {% set column_types = agate_table.column_types %} 3 | {% set serializable_rows = [] %} 4 | {% for agate_row in agate_table.rows %} 5 | {% set serializable_row = {} %} 6 | {% for col_name, col_value in agate_row.items() %} 7 | {% set serializable_col_value = column_types[loop.index0].jsonify(col_value) %} 8 | {% set serializable_col_name = col_name | lower %} 9 | {% do serializable_row.update({serializable_col_name: serializable_col_value}) %} 10 | {% endfor %} 11 | {% do serializable_rows.append(serializable_row) %} 12 | {% endfor %} 13 | {{ return(serializable_rows) }} 14 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/run_queries/agate_to_json.sql: -------------------------------------------------------------------------------- 1 | {% macro agate_to_json(agate_table) %} 2 | {% set serializable_rows = elementary.agate_to_dicts(agate_table) %} 3 | {{ return(tojson(serializable_rows)) }} 4 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/run_queries/render_run_query.sql: -------------------------------------------------------------------------------- 1 | {% macro render_run_query(prerendered_query) %} 2 | {% set results = elementary.run_query(render(prerendered_query)) %} 3 | {% do return(elementary.agate_to_dicts(results)) %} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /macros/utils/run_queries/result_column_to_list.sql: -------------------------------------------------------------------------------- 1 | {% macro result_column_to_list(single_column_query) %} 2 | {% set query_result = elementary.run_query(single_column_query) %} 3 | {% do return(query_result.columns[0]) %} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /macros/utils/run_queries/result_value.sql: -------------------------------------------------------------------------------- 1 | {% macro result_value(single_column_query) %} 2 | {% set result = elementary.run_query(single_column_query) %} 3 | {% if not result %} 4 | {% do return(none) %} 5 | {% endif %} 6 | {% do return(result[0][0]) %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/utils/run_queries/run_query.sql: -------------------------------------------------------------------------------- 1 | {% macro run_query(query, lowercase_column_names=True) %} 2 | {% set query_result = dbt.run_query(query) %} 3 | {% if lowercase_column_names %} 4 | {% set lowercased_column_names = {} %} 5 | {% for column_name in query_result.column_names %} 6 | {% do lowercased_column_names.update({column_name: column_name.lower()}) %} 7 | {% endfor %} 8 | {% set query_result = query_result.rename(lowercased_column_names) %} 9 | {% endif %} 10 | 11 | {% do return(query_result) %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/utils/run_queries/union_macro_queries.sql: -------------------------------------------------------------------------------- 1 | {% macro union_macro_queries(param_list, query_macro) %} 2 | {% for param in param_list %} 3 | ({{ query_macro(param) }}) 4 | {% if not loop.last %} 5 | union all 6 | {% endif %} 7 | {% endfor %} 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /macros/utils/sql_utils/escape_select.sql: -------------------------------------------------------------------------------- 1 | {% macro escape_select(column_names) %} 2 | {% do return(adapter.dispatch('escape_select', 'elementary')(column_names)) %} 3 | {% endmacro %} 4 | 5 | {% macro default__escape_select(column_names) %} 6 | {% do return(column_names | join(',')) %} 7 | {% endmacro %} 8 | 9 | {% macro redshift__escape_select(column_names) %} 10 | {% do return('\"' + column_names | join('\", \"') + '\"') %} 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /macros/utils/sql_utils/list_concat_with_separator.sql: -------------------------------------------------------------------------------- 1 | {% macro list_concat_with_separator(item_list, separator, handle_nulls = true) %} 2 | {% set new_list = [] %} 3 | {% for item in item_list %} 4 | {% set new_item = elementary.edr_quote(item) %} 5 | {% if handle_nulls %} 6 | {% set new_item = "case when " ~ elementary.edr_cast_as_string(item) ~ " is null then 'NULL' else " ~ elementary.edr_cast_as_string(item) ~ " end" %} 7 | {% endif %} 8 | {% do new_list.append(new_item) %} 9 | {% if not loop.last %} 10 | {% do new_list.append(elementary.edr_quote(separator)) %} 11 | {% endif %} 12 | {% endfor %} 13 | {{ return(elementary.join_list(new_list, " || ")) }} 14 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/sql_utils/min_max.sql: -------------------------------------------------------------------------------- 1 | {% macro arithmetic_max(val1, val2) %} 2 | (0.5 * ({{ val1 }} + {{ val2 }} + abs({{ val1 }} - {{ val2 }}))) 3 | {% endmacro %} 4 | 5 | {% macro arithmetic_min(val1, val2) %} 6 | (0.5 * ({{ val1 }} + {{ val2 }} - abs({{ val1 }} - {{ val2 }}))) 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/utils/sql_utils/to_sql_list.sql: -------------------------------------------------------------------------------- 1 | {% macro to_sql_list(ls) %} 2 | {% set rendered_items = [] %} 3 | {% for item in ls %} 4 | {% do rendered_items.append(elementary.render_value(item)) %} 5 | {% endfor %} 6 | 7 | ({{ rendered_items | join(', ') }}) 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /macros/utils/table_operations/create_intermediate_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro create_intermediate_relation(base_relation, rows, temporary, like_columns=none) %} 2 | {% set int_relation = elementary.edr_make_intermediate_relation(base_relation).incorporate(type='table') %} 3 | 4 | {% if not elementary.has_temp_table_support() %} 5 | {% set temporary = false %} 6 | {% endif %} 7 | 8 | {% do elementary.create_table_like(int_relation, base_relation, temporary, like_columns) %} 9 | {% do elementary.insert_rows(int_relation, rows, should_commit=false, chunk_size=elementary.get_config_var('dbt_artifacts_chunk_size')) %} 10 | {% do return(int_relation) %} 11 | {% endmacro %} 12 | 13 | {% macro edr_make_intermediate_relation(base_relation) %} 14 | {% do return(adapter.dispatch("edr_make_intermediate_relation", "elementary")(base_relation)) %} 15 | {% endmacro %} 16 | 17 | {% macro default__edr_make_intermediate_relation(base_relation) %} 18 | {% do return(elementary.make_temp_table_relation(base_relation)) %} 19 | {% endmacro %} 20 | 21 | {% macro databricks__edr_make_intermediate_relation(base_relation) %} 22 | {% set tmp_identifier = elementary.table_name_with_suffix(base_relation.identifier, elementary.get_timestamped_table_suffix()) %} 23 | {% set tmp_relation = api.Relation.create( 24 | identifier=tmp_identifier, 25 | schema=base_relation.schema, 26 | database=base_relation.database, 27 | type='table') %} 28 | {% do return(tmp_relation) %} 29 | {% endmacro %} 30 | -------------------------------------------------------------------------------- /macros/utils/table_operations/create_table_like.sql: -------------------------------------------------------------------------------- 1 | {% macro create_table_like(relation, like_relation, temporary=False, like_columns=none) %} 2 | {% set empty_table_query %} 3 | SELECT 4 | {% if like_columns %} 5 | {% for column in like_columns %} 6 | {{ column }}{{ ", " if not loop.last }} 7 | {% endfor %} 8 | {% else %} 9 | * 10 | {% endif %} 11 | FROM {{ like_relation }} 12 | WHERE 1 = 0 13 | {% endset %} 14 | {% do elementary.run_query(dbt.create_table_as(temporary, relation, empty_table_query)) %} 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /macros/utils/table_operations/delete_if_incremental.sql: -------------------------------------------------------------------------------- 1 | {% macro delete_if_incremental(where_clause) %} 2 | 3 | {% set query%} 4 | delete from {{ this }} 5 | where {{ where_clause }} 6 | {% endset %} 7 | 8 | {% if is_incremental() %} 9 | {% do elementary.run_query(query) %} 10 | {% endif %} 11 | 12 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/table_operations/fully_drop_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro fully_drop_relation(relation) %} 2 | {{ return(adapter.dispatch('fully_drop_relation', 'elementary')(relation)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__fully_drop_relation(relation) %} 6 | {% do adapter.drop_relation(relation) %} 7 | {% endmacro %} 8 | 9 | {% macro athena__fully_drop_relation(relation) %} 10 | {% do adapter.clean_up_table(relation) %} 11 | {% do adapter.drop_relation(relation) %} 12 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/table_operations/get_column_in_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro get_column_in_relation(relation, column_name) %} 2 | {% set columns = adapter.get_columns_in_relation(relation) %} 3 | {% for column in columns %} 4 | {% if column.name == column_name %} 5 | {% do return(column) %} 6 | {% endif %} 7 | {% endfor %} 8 | {% do return(none) %} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/utils/table_operations/get_columns_and_types.sql: -------------------------------------------------------------------------------- 1 | {% macro get_columns_and_types(table_name, schema_name = none, database_name = none) %} 2 | 3 | {# dbt models can be found with identifier only #} 4 | {# for non-dbt tables database_name and schema_name are required #} 5 | 6 | {%- if not database_name %} 7 | {%- set database_name = elementary.target_database() %} 8 | {%- endif %} 9 | {%- if not schema_name %} 10 | {%- set schema_name = target.schema %} 11 | {%- endif %} 12 | 13 | {%- set columns = [] %} 14 | 15 | {%- set relation = adapter.get_relation( 16 | database=database_name, 17 | schema=schema_name, 18 | identifier=table_name) -%} 19 | 20 | {%- set columns_from_relation = adapter.get_columns_in_relation(relation) -%} 21 | 22 | {% for column in columns_from_relation %} 23 | {%- set column_item = {'column_name': column['column'], 'data_type': elementary.normalize_data_type(elementary.get_column_data_type(column))} %} 24 | {%- do columns.append(column_item) -%} 25 | {% endfor %} 26 | 27 | {{ return(columns) }} 28 | 29 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/table_operations/get_relation_max_length.sql: -------------------------------------------------------------------------------- 1 | {# We create tables and some databases limit the length of table names #} 2 | {% macro get_relation_max_name_length() %} 3 | {{ return(adapter.dispatch('get_relation_max_name_length', 'elementary')()) }} 4 | {% endmacro %} 5 | 6 | {% macro default__get_relation_max_name_length(temporary, relation, sql_query) %} 7 | {{ return(none) }} 8 | {% endmacro %} 9 | 10 | {% macro snowflake__get_relation_max_name_length(temporary, relation, sql_query) %} 11 | {{ return(255) }} 12 | {% endmacro %} 13 | 14 | {% macro redshift__get_relation_max_name_length(temporary, relation, sql_query) %} 15 | {{ return(125) }} 16 | {% endmacro %} 17 | 18 | {% macro postgres__get_relation_max_name_length(temporary, relation, sql_query) %} 19 | {{ return(63) }} 20 | {% endmacro %} 21 | 22 | {% macro spark__get_relation_max_name_length(temporary, relation, sql_query) %} 23 | {{ return(127) }} 24 | {% endmacro %} 25 | 26 | {% macro athena__get_relation_max_name_length(temporary, relation, sql_query) %} 27 | {{ return(255) }} 28 | {% endmacro %} 29 | 30 | {% macro trino__get_relation_max_name_length(temporary, relation, sql_query) %} 31 | {{ return(128) }} 32 | {% endmacro %} 33 | 34 | {% macro clickhouse__get_relation_max_name_length(temporary, relation, sql_query) %} 35 | {{ return(128) }} 36 | {% endmacro %} 37 | -------------------------------------------------------------------------------- /macros/utils/table_operations/get_row_count.sql: -------------------------------------------------------------------------------- 1 | {% macro get_row_count(full_table_name) %} 2 | 3 | {% set query_row_count %} 4 | select count(*) from {{ full_table_name }} 5 | {% endset %} 6 | {% if execute %} 7 | {% set result = elementary.run_query(query_row_count).columns[0].values()[0] %} 8 | {% endif %} 9 | {{ return(result) }} 10 | 11 | {% endmacro %}} -------------------------------------------------------------------------------- /macros/utils/table_operations/get_timestamped_table_suffix.sql: -------------------------------------------------------------------------------- 1 | {% macro get_timestamped_table_suffix() %} 2 | {% do return(modules.datetime.datetime.utcnow().strftime('__tmp_%Y%m%d%H%M%S%f')) %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/utils/table_operations/has_temp_table_support.sql: -------------------------------------------------------------------------------- 1 | {% macro has_temp_table_support() %} 2 | {% do return(adapter.dispatch("has_temp_table_support", "elementary")()) %} 3 | {% endmacro %} 4 | 5 | {% macro default__has_temp_table_support() %} 6 | {% do return(true) %} 7 | {% endmacro %} 8 | 9 | {% macro spark__has_temp_table_support() %} 10 | {% do return(false) %} 11 | {% endmacro %} 12 | 13 | {% macro trino__has_temp_table_support() %} 14 | {% do return(false) %} 15 | {% endmacro %} 16 | 17 | {% macro athena__has_temp_table_support() %} 18 | {% do return(false) %} 19 | {% endmacro %} 20 | 21 | {% macro clickhouse__has_temp_table_support() %} 22 | {% do return(false) %} 23 | {% endmacro %} 24 | 25 | -------------------------------------------------------------------------------- /macros/utils/table_operations/insert_as_select.sql: -------------------------------------------------------------------------------- 1 | {% macro insert_as_select(table_relation, select_query) %} 2 | {# when calling this macro, you need to add depends on ref comment #} 3 | {# ref_model and select_query need to have the same columns #} 4 | 5 | {%- set insert_query %} 6 | insert into {{ table_relation }} 7 | with tmp_table as ( 8 | {{ select_query }} 9 | ) 10 | select * from tmp_table 11 | {%- endset %} 12 | 13 | {{ return(insert_query) }} 14 | 15 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/table_operations/relation_exists.sql: -------------------------------------------------------------------------------- 1 | {% macro relation_exists(relation) %} 2 | {%- set loaded_relation = load_relation(relation) -%} 3 | {% if loaded_relation is not none %} 4 | {{ return(True) }} 5 | {% endif %} 6 | {{ return(False) }} 7 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/table_operations/remove_rows.sql: -------------------------------------------------------------------------------- 1 | {% macro remove_rows(table_name) %} 2 | 3 | {% set remove_rows_query %} 4 | delete from {{ table_name }} 5 | {% endset %} 6 | {% do elementary.run_query(remove_rows_query) %} 7 | 8 | {% endmacro %} -------------------------------------------------------------------------------- /macros/utils/table_operations/table_name_with_suffix.sql: -------------------------------------------------------------------------------- 1 | {% macro table_name_with_suffix(table_name, suffix) %} 2 | {% set relation_max_name_length = elementary.get_relation_max_name_length() %} 3 | {% if relation_max_name_length %} 4 | {% set suffix_length = suffix | length %} 5 | {% set table_name_with_suffix = table_name[:relation_max_name_length - suffix_length] ~ suffix %} 6 | {% else %} 7 | {% set table_name_with_suffix = table_name ~ suffix %} 8 | {% endif %} 9 | {{ return(table_name_with_suffix) }} 10 | {% endmacro %} 11 | -------------------------------------------------------------------------------- /models/alerts_views.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: alerts_dbt_models 5 | description: > 6 | A view that is used by the Elementary CLI to generate models alerts, including all the fields the alert will include such as owner, tags, error message, etc. 7 | It joins data about models and snapshots run results, and filters alerts according to configuration. 8 | 9 | - name: alerts_dbt_tests 10 | description: > 11 | A view that is used by the Elementary CLI to generate dbt tests alerts, including all the fields the alert will include such as owner, tags, error message, etc. 12 | This view includes data about all dbt tests except elementary tests. 13 | It filters alerts according to configuration. 14 | 15 | - name: alerts_anomaly_detection 16 | description: > 17 | A view that is used by the Elementary CLI to generate alerts on data anomalies detected using the elementary anomaly detection tests. 18 | The view filters alerts according to configuration. 19 | 20 | - name: alerts_schema_changes 21 | description: > 22 | A view that is used by the Elementary CLI to generate alerts on schema changes detected using elementary tests. 23 | The view filters alerts according to configuration. 24 | -------------------------------------------------------------------------------- /models/edr/alerts/alerts_anomaly_detection.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind =False 5 | ) 6 | }} 7 | 8 | with elementary_test_results as ( 9 | select * from {{ ref('elementary_test_results') }} 10 | ), 11 | 12 | alerts_anomaly_detection as ( 13 | select id as alert_id, 14 | data_issue_id, 15 | test_execution_id, 16 | test_unique_id, 17 | model_unique_id, 18 | detected_at, 19 | database_name, 20 | schema_name, 21 | table_name, 22 | column_name, 23 | test_type as alert_type, 24 | test_sub_type as sub_type, 25 | test_results_description as alert_description, 26 | owners, 27 | tags, 28 | test_results_query as alert_results_query, 29 | other, 30 | test_name, 31 | test_short_name, 32 | test_params, 33 | severity, 34 | status, 35 | result_rows 36 | from elementary_test_results 37 | where {{ not elementary.get_config_var('disable_test_alerts') }} and lower(status) != 'pass' {%- if elementary.get_config_var('disable_warn_alerts') -%} and lower(status) != 'warn' {%- endif -%} {%- if elementary.get_config_var('disable_skipped_test_alerts') -%} and lower(status) != 'skipped' {%- endif -%} and test_type = 'anomaly_detection' 38 | ) 39 | 40 | select * from alerts_anomaly_detection 41 | -------------------------------------------------------------------------------- /models/edr/alerts/alerts_dbt_source_freshness.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | with results as ( 9 | select * from {{ ref('dbt_source_freshness_results') }} 10 | ), 11 | 12 | sources as ( 13 | select * from {{ ref('dbt_sources') }} 14 | ) 15 | 16 | select 17 | results.source_freshness_execution_id as alert_id, 18 | results.max_loaded_at, 19 | results.snapshotted_at, 20 | {{ elementary.edr_cast_as_timestamp("results.generated_at") }} as detected_at, 21 | results.max_loaded_at_time_ago_in_s, 22 | results.status, 23 | results.error, 24 | results.warn_after, 25 | results.error_after, 26 | results.filter, 27 | sources.unique_id, 28 | sources.database_name, 29 | sources.schema_name, 30 | sources.source_name, 31 | sources.identifier, 32 | sources.tags, 33 | sources.meta, 34 | sources.owner, 35 | sources.package_name, 36 | sources.path, 37 | -- These columns below are deprecated. We add them since this view 38 | -- was used to be loaded into an incremental model with those columns, their names were later changed 39 | -- and Databricks doesn't respect `on_schema_change = 'append_new_columns'` properly, as described here - 40 | -- https://docs.databricks.com/en/delta/update-schema.html#automatic-schema-evolution-for-delta-lake-merge 41 | results.error_after as freshness_error_after, 42 | results.warn_after as freshness_warn_after, 43 | results.filter as freshness_filter 44 | from results 45 | join sources on results.unique_id = sources.unique_id 46 | where {{ not elementary.get_config_var('disable_source_freshness_alerts') }} and lower(status) != 'pass' 47 | -------------------------------------------------------------------------------- /models/edr/alerts/alerts_dbt_tests.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | with elementary_test_results as ( 9 | select * from {{ ref('elementary_test_results') }} 10 | ), 11 | 12 | alerts_dbt_tests as ( 13 | select id as alert_id, 14 | data_issue_id, 15 | test_execution_id, 16 | test_unique_id, 17 | model_unique_id, 18 | detected_at, 19 | database_name, 20 | schema_name, 21 | table_name, 22 | column_name, 23 | test_type as alert_type, 24 | test_sub_type as sub_type, 25 | test_results_description as alert_description, 26 | owners, 27 | tags, 28 | test_results_query as alert_results_query, 29 | other, 30 | test_name, 31 | test_short_name, 32 | test_params, 33 | severity, 34 | status, 35 | result_rows 36 | from elementary_test_results 37 | where {{ not elementary.get_config_var('disable_test_alerts') }} and lower(status) != 'pass' {% if elementary.get_config_var('disable_warn_alerts') %} and lower(status) != 'warn' {% endif %} {% if elementary.get_config_var('disable_skipped_test_alerts') %} and lower(status) != 'skipped' {% endif %} and test_type = 'dbt_test' 38 | ) 39 | 40 | select * from alerts_dbt_tests 41 | -------------------------------------------------------------------------------- /models/edr/alerts/alerts_schema_changes.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False, 5 | ) 6 | }} 7 | 8 | 9 | with elementary_test_results as ( 10 | select * from {{ ref('elementary_test_results') }} 11 | ), 12 | 13 | alerts_schema_changes as ( 14 | select id as alert_id, 15 | data_issue_id, 16 | test_execution_id, 17 | test_unique_id, 18 | model_unique_id, 19 | detected_at, 20 | database_name, 21 | schema_name, 22 | table_name, 23 | column_name, 24 | test_type as alert_type, 25 | test_sub_type as sub_type, 26 | test_results_description as alert_description, 27 | owners, 28 | tags, 29 | test_results_query as alert_results_query, 30 | other, 31 | test_name, 32 | test_short_name, 33 | test_params, 34 | severity, 35 | status, 36 | result_rows 37 | from elementary_test_results 38 | where {{ not elementary.get_config_var('disable_test_alerts') }} and lower(status) != 'pass' {%- if elementary.get_config_var('disable_warn_alerts') -%} and lower(status) != 'warn' {%- endif -%} {%- if elementary.get_config_var('disable_skipped_test_alerts') -%} and lower(status) != 'skipped' {%- endif -%} and test_type = 'schema_change' 39 | ) 40 | 41 | select * from alerts_schema_changes 42 | -------------------------------------------------------------------------------- /models/edr/data_monitoring/anomaly_detection/anomaly_threshold_sensitivity.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | with metrics_anomaly_score as ( 9 | 10 | select * from {{ ref('metrics_anomaly_score') }} 11 | 12 | ), 13 | 14 | score_sensitivity as ( 15 | 16 | select 17 | full_table_name, 18 | column_name, 19 | metric_name, 20 | latest_metric_value, 21 | training_avg as metric_avg, 22 | training_stddev as metric_stddev, 23 | anomaly_score, 24 | case when abs(anomaly_score) >= 1.5 then true else false end as {{ elementary.edr_quote_column('is_anomaly_1_5') }}, 25 | case when abs(anomaly_score) >= 2 then true else false end as {{ elementary.edr_quote_column('is_anomaly_2') }}, 26 | case when abs(anomaly_score) >= 2.5 then true else false end as {{ elementary.edr_quote_column('is_anomaly_2_5') }}, 27 | case when abs(anomaly_score) >= 3 then true else false end as {{ elementary.edr_quote_column('is_anomaly_3') }}, 28 | case when abs(anomaly_score) >= 3.5 then true else false end as {{ elementary.edr_quote_column('is_anomaly_3_5') }}, 29 | case when abs(anomaly_score) >= 4 then true else false end as {{ elementary.edr_quote_column('is_anomaly_4') }}, 30 | case when abs(anomaly_score) >= 4.5 then true else false end as {{ elementary.edr_quote_column('is_anomaly_4_5') }} 31 | from metrics_anomaly_score 32 | where abs(anomaly_score) >= 1.5 33 | 34 | ) 35 | 36 | select * from score_sensitivity 37 | -------------------------------------------------------------------------------- /models/edr/data_monitoring/data_monitoring/data_monitoring_metrics.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | unique_key='id', 5 | on_schema_change='append_new_columns', 6 | indexes=[{'columns': ['full_table_name', "column_name", "metric_name"]}] if target.type == "postgres" else [], 7 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 8 | meta={ 9 | "timestamp_column": "created_at", 10 | "prev_timestamp_column": "updated_at", 11 | }, 12 | table_type=elementary.get_default_table_type(), 13 | incremental_strategy=elementary.get_default_incremental_strategy(), 14 | ) 15 | }} 16 | 17 | {{ elementary.empty_data_monitoring_metrics() }} 18 | -------------------------------------------------------------------------------- /models/edr/data_monitoring/schema_changes/schema_columns_snapshot.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | unique_key = 'column_state_id', 5 | on_schema_change = 'append_new_columns', 6 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 7 | meta={ 8 | "timestamp_column": "created_at", 9 | "prev_timestamp_column": "detected_at", 10 | }, 11 | table_type=elementary.get_default_table_type(), 12 | incremental_strategy=elementary.get_default_incremental_strategy() 13 | ) 14 | }} 15 | 16 | {{ elementary.empty_schema_columns_snapshot() }} 17 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_artifacts_hashes.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | {% set artifact_models = [ 9 | "dbt_models", 10 | "dbt_tests", 11 | "dbt_sources", 12 | "dbt_snapshots", 13 | "dbt_metrics", 14 | "dbt_exposures", 15 | "dbt_seeds", 16 | "dbt_columns", 17 | "dbt_groups", 18 | ] %} 19 | 20 | {% for artifact_model in artifact_models %} 21 | select 22 | '{{ artifact_model }}' as artifacts_model, 23 | metadata_hash 24 | from {{ ref(artifact_model) }} 25 | {% if not loop.last %} union all {% endif %} 26 | {% endfor %} 27 | order by metadata_hash 28 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_columns.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_columns() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_columns_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_exposures.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_exposures() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_exposures_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_groups.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_groups() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_groups_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_invocations.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'incremental', 4 | transient=False, 5 | unique_key = 'invocation_id', 6 | on_schema_change = 'append_new_columns', 7 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 8 | meta={ 9 | "timestamp_column": "created_at", 10 | "prev_timestamp_column": "generated_at", 11 | }, 12 | table_type=elementary.get_default_table_type(), 13 | incremental_strategy=elementary.get_default_incremental_strategy() 14 | ) 15 | }} 16 | 17 | {{ elementary.get_dbt_invocations_empty_table_query() }} 18 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_metrics.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_metrics() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_metrics_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_models.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_models() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_models_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_run_results.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'incremental', 4 | transient=False, 5 | unique_key = 'model_execution_id', 6 | on_schema_change = 'append_new_columns', 7 | indexes=[{'columns': ['unique_id']}] if target.type == "postgres" else [], 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | meta={ 10 | "dedup_by_column": "model_execution_id", 11 | "timestamp_column": "created_at", 12 | "prev_timestamp_column": "generated_at", 13 | }, 14 | table_type=elementary.get_default_table_type(), 15 | incremental_strategy=elementary.get_default_incremental_strategy() 16 | ) 17 | }} 18 | 19 | {{ elementary.get_dbt_run_results_empty_table_query() }} 20 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_seeds.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_seeds() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_seeds_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_snapshots.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_snapshots() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_models_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_sources.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_sources() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_sources_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/dbt_artifacts/dbt_tests.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | transient=False, 5 | post_hook='{{ elementary.upload_dbt_tests() }}', 6 | unique_key='unique_id', 7 | on_schema_change='sync_all_columns', 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | table_type=elementary.get_default_table_type(), 10 | incremental_strategy=elementary.get_default_incremental_strategy() 11 | ) 12 | }} 13 | 14 | {{ elementary.get_dbt_tests_empty_table_query() }} 15 | -------------------------------------------------------------------------------- /models/edr/run_results/dbt_source_freshness_results.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'incremental', 4 | unique_key = 'source_freshness_execution_id', 5 | on_schema_change = 'append_new_columns', 6 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 7 | meta={ 8 | "timestamp_column": "created_at", 9 | "prev_timestamp_column": "generated_at", 10 | }, 11 | table_type=elementary.get_default_table_type(), 12 | incremental_strategy=elementary.get_default_incremental_strategy() 13 | ) 14 | }} 15 | 16 | {{ elementary.empty_dbt_source_freshness_results() }} 17 | -------------------------------------------------------------------------------- /models/edr/run_results/elementary_test_results.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'incremental', 4 | unique_key = 'id', 5 | on_schema_change = 'append_new_columns', 6 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 7 | meta={ 8 | "timestamp_column": "created_at", 9 | "prev_timestamp_column": "detected_at", 10 | }, 11 | table_type=elementary.get_default_table_type(), 12 | incremental_strategy=elementary.get_default_incremental_strategy() 13 | ) 14 | }} 15 | 16 | {{ elementary.empty_elementary_test_results() }} 17 | -------------------------------------------------------------------------------- /models/edr/run_results/job_run_results.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | {% set job_run_started_at %} 9 | min({{ elementary.edr_cast_as_timestamp("run_started_at") }}) 10 | {% endset %} 11 | 12 | {% set job_run_completed_at %} 13 | max({{ elementary.edr_cast_as_timestamp("run_completed_at") }}) 14 | {% endset %} 15 | 16 | with jobs as ( 17 | select 18 | job_name, 19 | job_id, 20 | job_run_id, 21 | {{ job_run_started_at }} as job_run_started_at, 22 | {{ job_run_completed_at }} as job_run_completed_at, 23 | {{ elementary.timediff("second", job_run_started_at, job_run_completed_at) }} as job_run_execution_time 24 | from {{ ref('dbt_invocations') }} 25 | where job_id is not null 26 | group by job_name, job_id, job_run_id 27 | ) 28 | 29 | select 30 | job_name as name, 31 | job_id as id, 32 | job_run_id as run_id, 33 | job_run_started_at as run_started_at, 34 | job_run_completed_at as run_completed_at, 35 | job_run_execution_time as run_execution_time 36 | from jobs 37 | -------------------------------------------------------------------------------- /models/edr/run_results/seed_run_results.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | with dbt_run_results as ( 9 | select * from {{ ref('dbt_run_results') }} 10 | ), 11 | 12 | dbt_seeds as ( 13 | select * from {{ ref('dbt_seeds') }} 14 | ) 15 | 16 | SELECT 17 | run_results.model_execution_id, 18 | run_results.unique_id, 19 | run_results.invocation_id, 20 | run_results.query_id, 21 | run_results.name, 22 | run_results.generated_at, 23 | run_results.status, 24 | run_results.full_refresh, 25 | run_results.message, 26 | run_results.execution_time, 27 | run_results.execute_started_at, 28 | run_results.execute_completed_at, 29 | run_results.compile_started_at, 30 | run_results.compile_completed_at, 31 | run_results.compiled_code, 32 | run_results.adapter_response, 33 | run_results.thread_id, 34 | run_results.group_name, 35 | seeds.database_name, 36 | seeds.schema_name, 37 | run_results.materialization, 38 | seeds.tags, 39 | seeds.package_name, 40 | seeds.path, 41 | seeds.original_path, 42 | seeds.owner, 43 | seeds.alias 44 | FROM dbt_run_results run_results 45 | JOIN dbt_seeds seeds ON run_results.unique_id = seeds.unique_id 46 | -------------------------------------------------------------------------------- /models/edr/run_results/snapshot_run_results.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | with dbt_run_results as ( 9 | select * from {{ ref('dbt_run_results') }} 10 | ), 11 | 12 | dbt_snapshots as ( 13 | select * from {{ ref('dbt_snapshots') }} 14 | ) 15 | 16 | SELECT 17 | run_results.model_execution_id, 18 | run_results.unique_id, 19 | run_results.invocation_id, 20 | run_results.query_id, 21 | run_results.name, 22 | run_results.generated_at, 23 | run_results.status, 24 | run_results.full_refresh, 25 | run_results.message, 26 | run_results.execution_time, 27 | run_results.execute_started_at, 28 | run_results.execute_completed_at, 29 | run_results.compile_started_at, 30 | run_results.compile_completed_at, 31 | run_results.compiled_code, 32 | run_results.adapter_response, 33 | run_results.thread_id, 34 | run_results.group_name, 35 | snapshots.database_name, 36 | snapshots.schema_name, 37 | coalesce(run_results.materialization, snapshots.materialization) as materialization, 38 | snapshots.tags, 39 | snapshots.package_name, 40 | snapshots.path, 41 | snapshots.original_path, 42 | snapshots.owner, 43 | snapshots.alias 44 | FROM dbt_run_results run_results 45 | JOIN dbt_snapshots snapshots ON run_results.unique_id = snapshots.unique_id 46 | -------------------------------------------------------------------------------- /models/edr/run_results/test_result_rows.sql: -------------------------------------------------------------------------------- 1 | -- indexes are not supported in all warehouses, relevant to postgres only 2 | {{ 3 | config( 4 | materialized = 'incremental', 5 | unique_key = 'elementary_test_results_id', 6 | on_schema_change = 'append_new_columns', 7 | indexes=[{'columns': ['created_at']}, {'columns': ['elementary_test_results_id']}] if target.type == "postgres" else [], 8 | full_refresh=elementary.get_config_var('elementary_full_refresh'), 9 | meta={ 10 | "timestamp_column": "created_at", 11 | "prev_timestamp_column": "detected_at", 12 | }, 13 | table_type=elementary.get_default_table_type(), 14 | incremental_strategy=elementary.get_default_incremental_strategy() 15 | ) 16 | }} 17 | 18 | -- depends_on: {{ ref('elementary_test_results') }} 19 | {{ elementary.empty_table([ 20 | ('elementary_test_results_id', 'long_string'), 21 | ('result_row', 'long_string'), 22 | ('detected_at','timestamp'), 23 | ('created_at','timestamp'), 24 | ]) }} 25 | -------------------------------------------------------------------------------- /models/edr/system/metadata.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='table') }} 2 | 3 | SELECT 4 | '{{ elementary.get_elementary_package_version() }}' as dbt_pkg_version 5 | -------------------------------------------------------------------------------- /models/edr/system/monitors_runs.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | bind=False 5 | ) 6 | }} 7 | 8 | with data_monitoring_metrics as ( 9 | 10 | select * from {{ ref('data_monitoring_metrics') }} 11 | 12 | ), 13 | 14 | max_bucket_end as ( 15 | 16 | select full_table_name, 17 | column_name, 18 | metric_name, 19 | metric_properties, 20 | max(bucket_end) as last_bucket_end, 21 | min(bucket_end) as first_bucket_end 22 | from data_monitoring_metrics 23 | group by 1,2,3,4 24 | 25 | ) 26 | 27 | select * from max_bucket_end -------------------------------------------------------------------------------- /models/elementary_tests.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: data_monitoring_metrics 5 | description: > 6 | Elementary anomaly detection tests monitor metrics such as volume, freshness and data quality metrics. 7 | This incremental table is used to store the metrics over time. 8 | On each anomaly detection test, the test queries this table for historical metrics, and compares to the latest values. 9 | The table is updated with new metrics on the on-run-end named handle_test_results that is executed at the end of dbt test invocations. 10 | 11 | - name: metrics_anomaly_score 12 | description: > 13 | This is a view on `data_monitoring_metrics` that runs the same query the anomaly detection tests run to calculate anomaly scores. 14 | The purpose of this view is to provide visibility to the results of anomaly detection tests. 15 | 16 | - name: anomaly_threshold_sensitivity 17 | description: > 18 | This is a view on `metrics_anomaly_score` that calculates if values of metrics from latest runs would have been considered anomalies in different anomaly scores. 19 | This can help you decide if there is a need to adjust the `anomaly_score_threshold`. 20 | 21 | - name: monitors_runs 22 | description: > 23 | This is a view on `data_monitoring_metrics` that is used to determine when a specific anomaly detection test was last executed. 24 | Each anomaly detection test queries this view to decide on a start time for collecting metrics. 25 | 26 | - name: schema_columns_snapshot 27 | description: > 28 | Stores the schema details for tables that are monitored with elementary schema changes test. 29 | In order to compare current schema to previous state, we must store the previous state. 30 | The data is from a view that queries the data warehouse information schema. 31 | This is an incremental table. 32 | -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: [">=0.8.0", "<2.0.0"] 4 | -------------------------------------------------------------------------------- /snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/snapshots/.gitkeep -------------------------------------------------------------------------------- /tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elementary-data/dbt-data-reliability/59270ed6a84ed6d4e9d449bc2725643771389cae/tests/.gitkeep --------------------------------------------------------------------------------