├── .changes ├── unreleased │ └── .gitkeep ├── v0.2.0.md ├── v0.9.0.md ├── v0.1.0.md ├── v0.18.4.md ├── v0.23.0.md ├── v0.10.3.md ├── v0.3.0.md ├── v0.19.2.md ├── v0.4.0.md ├── v0.18.0.md ├── v0.17.2.md ├── v0.17.0.md ├── v0.19.0.md ├── v0.11.1.md ├── v0.18.2.md ├── v0.19.3.md ├── v0.7.0.md ├── v0.21.0.md ├── header.tpl.md ├── v0.10.0.md ├── v0.20.4.md ├── v0.5.3.md ├── v0.5.4.md ├── v0.16.0.md ├── v0.13.0.md ├── v0.7.3.md ├── v0.14.0.md ├── v0.5.2.md ├── v0.8.0.md ├── v0.5.1.md ├── v0.18.3.md ├── v0.7.2.md ├── v0.20.1.md ├── v0.6.0.md ├── v0.15.0.md ├── v0.7.1.md ├── v0.14.1.md ├── v0.14.2.md ├── v0.11.0.md ├── v0.19.1.md ├── v0.9.1.md ├── v0.10.1.md ├── v0.12.0.md ├── v0.14.3.md ├── v0.5.0.md ├── v0.22.0.md ├── v0.10.2.md ├── v0.18.1.md ├── v0.20.2.md ├── v0.17.1.md ├── v0.20.3.md ├── v0.23.1.md └── v0.20.0.md ├── integration_tests ├── packages.yml ├── models │ └── foo.sql ├── config.py ├── profiles.yml ├── tests │ └── jobs_with_cost_metadata.sql └── dbt_project.yml ├── scripts └── git_tag.sh ├── packages.yml ├── CONTRIBUTING.md ├── models ├── monitoring │ ├── base │ │ ├── jobs_by_project_with_cost.sql │ │ ├── dataset_options.sql │ │ ├── bigquery_audit_logs.sql │ │ ├── table_and_storage_with_cost.sql │ │ └── gcp_billing_export_resource_v1.sql │ ├── storage │ │ ├── datamart │ │ │ ├── table │ │ │ │ ├── most_expensive_tables.sql │ │ │ │ ├── table_with_potential_savings.sql │ │ │ │ ├── read_heavy_tables.sql │ │ │ │ ├── partitions_monitoring.sql │ │ │ │ ├── table_with_potential_savings.yml │ │ │ │ └── partitions_monitoring.yml │ │ │ ├── unused_tables.sql │ │ │ ├── billing │ │ │ │ ├── storage_billing_per_hour.yml │ │ │ │ └── storage_billing_per_hour.sql │ │ │ └── dataset │ │ │ │ └── dataset_with_potential_savings.sql │ │ └── intermediate │ │ │ ├── table_reference_incremental.yml │ │ │ ├── table_reference_incremental.sql │ │ │ ├── stg_partitions_monitoring.yml │ │ │ └── stg_partitions_monitoring.sql │ ├── compute │ │ ├── datamart │ │ │ ├── time │ │ │ │ ├── compute_cost_per_hour_view.sql │ │ │ │ ├── compute_cost_per_minute_view.sql │ │ │ │ ├── compute_cost_per_hour_view.yml │ │ │ │ └── compute_cost_per_minute_view.yml │ │ │ ├── job │ │ │ │ ├── most_expensive_jobs.sql │ │ │ │ ├── slowest_jobs.sql │ │ │ │ ├── query_with_better_pricing_using_on_demand_view.sql │ │ │ │ ├── query_with_better_pricing_using_flat_pricing_view.sql │ │ │ │ ├── most_repeated_jobs.sql │ │ │ │ ├── slowest_jobs.yml │ │ │ │ ├── most_expensive_jobs.yml │ │ │ │ ├── most_repeated_jobs.yml │ │ │ │ ├── job_failure_analysis.sql │ │ │ │ └── job_failure_analysis.yml │ │ │ ├── user │ │ │ │ ├── most_expensive_users.sql │ │ │ │ └── most_expensive_users.yml │ │ │ ├── dbt │ │ │ │ ├── most_repeated_models.yml │ │ │ │ ├── most_expensive_models.sql │ │ │ │ ├── most_repeated_models.sql │ │ │ │ └── most_expensive_models.yml │ │ │ └── bi_engine │ │ │ │ └── bi_engine_materialized_view_analysis.yml │ │ └── intermediate │ │ │ ├── billing │ │ │ ├── compute_billing_per_hour.yml │ │ │ └── compute_billing_per_hour.sql │ │ │ ├── cost │ │ │ ├── compute_cost_per_minute.sql │ │ │ ├── compute_cost_per_hour.sql │ │ │ ├── compute_cost_per_hour.yml │ │ │ └── compute_cost_per_minute.yml │ │ │ ├── user │ │ │ └── users_costs_incremental.sql │ │ │ └── dbt │ │ │ └── models_costs_incremental.sql │ └── global │ │ └── datamart │ │ ├── dbt_bigquery_monitoring_options.yml │ │ ├── daily_spend.yml │ │ └── dbt_bigquery_monitoring_options.sql └── information_schema │ ├── bi_engine │ ├── information_schema_bi_capacities.sql │ ├── information_schema_bi_capacity_changes.sql │ ├── information_schema_bi_capacities.yml │ └── information_schema_bi_capacity_changes.yml │ ├── datasets │ ├── information_schema_schemata_options.sql │ ├── information_schema_schemata.sql │ ├── information_schema_schemata_links.sql │ ├── information_schema_shared_dataset_usage.sql │ ├── information_schema_schemata_options.yml │ ├── information_schema_schemata.yml │ ├── information_schema_schemata_replicas.sql │ ├── information_schema_schemata_links.yml │ └── information_schema_schemata_replicas_by_failover_reservation.sql │ ├── reservations │ ├── information_schema_assignments.sql │ ├── information_schema_capacity_commitments.sql │ ├── information_schema_assignment_changes.sql │ ├── information_schema_reservations_timeline.sql │ ├── information_schema_reservations.sql │ ├── information_schema_capacity_commitment_changes.sql │ ├── information_schema_reservation_changes.sql │ ├── information_schema_assignments.yml │ └── information_schema_capacity_commitments.yml │ ├── streaming │ ├── information_schema_streaming_timeline.sql │ ├── information_schema_streaming_timeline_by_folder.sql │ ├── information_schema_streaming_timeline_by_organization.sql │ ├── information_schema_streaming_timeline.yml │ ├── information_schema_streaming_timeline_by_folder.yml │ └── information_schema_streaming_timeline_by_organization.yml │ ├── write_api │ ├── information_schema_write_api_timeline.sql │ ├── information_schema_write_api_timeline_by_folder.sql │ ├── information_schema_write_api_timeline_by_organization.sql │ ├── information_schema_write_api_timeline.yml │ ├── information_schema_write_api_timeline_by_folder.yml │ └── information_schema_write_api_timeline_by_organization.yml │ ├── configuration │ ├── information_schema_project_options.yml │ ├── information_schema_organization_options.yml │ ├── information_schema_project_options.sql │ ├── information_schema_project_options_changes.sql │ ├── information_schema_organization_options.sql │ ├── information_schema_effective_project_options.sql │ ├── information_schema_organization_options_changes.sql │ ├── information_schema_effective_project_options.yml │ ├── information_schema_project_options_changes.yml │ └── information_schema_organization_options_changes.yml │ ├── recommendations_and_insights │ ├── information_schema_insights.sql │ ├── information_schema_recommendations.sql │ └── information_schema_recommendations_by_organization.sql │ ├── access_control │ ├── information_schema_object_privileges.sql │ └── information_schema_object_privileges.yml │ ├── tables │ ├── information_schema_table_snapshots.sql │ ├── information_schema_table_storage.sql │ ├── information_schema_table_options.yml │ ├── information_schema_table_options.sql │ ├── information_schema_table_snapshots.yml │ ├── information_schema_column_field_paths.sql │ ├── information_schema_constraint_column_usage.yml │ ├── information_schema_columns.sql │ ├── information_schema_table_storage_usage_timeline.sql │ ├── information_schema_tables.sql │ ├── information_schema_table_constraints.sql │ ├── information_schema_table_constraints.yml │ ├── information_schema_key_column_usage.yml │ ├── information_schema_table_storage_usage_timeline_by_folder.sql │ ├── information_schema_table_storage_usage_timeline_by_organization.sql │ ├── information_schema_table_storage_by_folder.sql │ ├── information_schema_constraint_column_usage.sql │ └── information_schema_table_storage_by_organization.sql │ ├── routines │ ├── information_schema_routine_options.yml │ ├── information_schema_routine_options.sql │ ├── information_schema_parameters.sql │ ├── information_schema_routines.sql │ └── information_schema_parameters.yml │ ├── views │ ├── information_schema_views.sql │ ├── information_schema_views.yml │ ├── information_schema_materialized_views.yml │ └── information_schema_materialized_views.sql │ ├── search_indexes │ ├── information_schema_search_index_columns.yml │ ├── information_schema_search_index_options.yml │ ├── information_schema_search_index_column_options.yml │ └── information_schema_search_indexes_by_organization.sql │ ├── vector_indexes │ ├── information_schema_vector_index_columns.yml │ ├── information_schema_vector_index_options.yml │ ├── information_schema_vector_index_columns.sql │ ├── information_schema_vector_index_options.sql │ └── information_schema_vector_indexes.sql │ ├── sessions │ ├── information_schema_sessions_by_user.sql │ ├── information_schema_sessions_by_project.sql │ ├── information_schema_sessions_by_user.yml │ └── information_schema_sessions_by_project.yml │ ├── jobs_timeline │ ├── information_schema_jobs_timeline.sql │ ├── information_schema_jobs_timeline_by_user.sql │ ├── information_schema_jobs_timeline_by_folder.sql │ └── information_schema_jobs_timeline_by_organization.sql │ └── jobs │ ├── information_schema_jobs.sql │ ├── information_schema_jobs_by_project.sql │ ├── information_schema_jobs_by_user.sql │ └── information_schema_jobs_by_folder.sql ├── macros ├── should_combine_audit_logs_and_information_schema.sql ├── top_sum_udf.sql ├── sharded_tables_merger.sql ├── currency_to_symbol.sql ├── materalization_information_schema.sql ├── materialization_view_if_explicit_projects.sql └── materialization_project_by_project_view.sql ├── .editorconfig ├── config.py ├── Makefile ├── docs ├── intro.md ├── installation.md └── configuration │ ├── gcp-billing.md │ ├── audit-logs.md │ └── audit-logs-vs-information-schema.md ├── .gitignore ├── .sqlfluff ├── .github └── workflows │ ├── release.yml │ ├── deploy_docs_test.yml │ └── build_base_image.yml ├── dbt_project.yml └── LICENSE /.changes/unreleased/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ 3 | -------------------------------------------------------------------------------- /scripts/git_tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git tag -a $1 -m "$1" 4 | git push --tags 5 | -------------------------------------------------------------------------------- /integration_tests/models/foo.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='incremental') }} 2 | 3 | select 1 as foo 4 | -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: [">=0.8.0", "<2.0.0"] 4 | # - package: dbt-labs/codegen 5 | # version: 0.12.1 6 | 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to dbt-bigquery-monitoring 2 | 3 | See related documentation page: 4 | https://bqbooster.github.io/dbt-bigquery-monitoring/contributing 5 | -------------------------------------------------------------------------------- /models/monitoring/base/jobs_by_project_with_cost.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized=materialized_as_view_if_explicit_projects() 4 | ) 5 | }} 6 | SELECT * 7 | FROM {{ ref('jobs_with_cost') }} 8 | -------------------------------------------------------------------------------- /.changes/v0.2.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.2.0 - October 17, 2023 2 | 3 | ### Breaking Changes 4 | 5 | - fix storage_with_cost.yml ([#0](https://github.com/Kayrnt/dbt-bigquery-monitoring/issues/0)) 6 | -------------------------------------------------------------------------------- /.changes/v0.9.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.9.0 - October 06, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Add GCP billing export support 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.1.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.1.0 - October 08, 2023 2 | 3 | ### Breaking Changes 4 | 5 | - Fix the incremental tables not working as intended ([#1](https://github.com/Kayrnt/dbt-bigquery-monitoring/issues/1)) 6 | -------------------------------------------------------------------------------- /.changes/v0.18.4.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.18.4 - February 21, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix audit logs upper bound for jobs models 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.23.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.23.0 - September 20, 2025 2 | 3 | ### Features 4 | 5 | 6 | - Add a macro to debug configuration 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.10.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.10.3 - October 27, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - SAFE_CAST option_value to avoid potential cast errors 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /macros/should_combine_audit_logs_and_information_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro should_combine_audit_logs_and_information_schema() -%} 2 | {{ return(dbt_bigquery_monitoring_variable_should_combine_audit_logs_and_information_schema()) }} 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /.changes/v0.3.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.3.0 - February 18, 2024 2 | 3 | ### Breaking Changes 4 | 5 | - Uniformize fields to count queries to query_count and improve the dbt docs ([#0](https://github.com/Kayrnt/dbt-bigquery-monitoring/issues/0)) 6 | -------------------------------------------------------------------------------- /.changes/v0.19.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.19.2 - March 27, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix target_relation regression in project_by_project_table materialization 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.4.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.4.0 - May 18, 2024 2 | 3 | ### Features 4 | 5 | - Add ephemeral models for all Google metadata tables (along generation script in /documentation_parser) 6 | - Add the "region mode" to avoid listing explicitly GCP projects 7 | -------------------------------------------------------------------------------- /.changes/v0.18.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.18.0 - January 13, 2025 2 | 3 | ### Features 4 | 5 | 6 | - Improve the scalability of the project approach by factoring them into a table 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.17.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.17.2 - January 06, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix DBT_BQ_MONITORING_GCP_PROJECTS being required while it should not for region mode 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yml] 12 | indent_size = 2 13 | trim_trailing_whitespace = false 14 | -------------------------------------------------------------------------------- /macros/top_sum_udf.sql: -------------------------------------------------------------------------------- 1 | {% macro top_sum(array) %} 2 | (SELECT APPROX_TOP_SUM(c.value, c.sum, 100) FROM UNNEST({{ array }}) c) 3 | {% endmacro %} 4 | 5 | {% macro top_sum_from_count(array) %} 6 | (SELECT APPROX_TOP_SUM(c.value, c.count, 100) FROM UNNEST({{ array }}) c) 7 | {% endmacro %} -------------------------------------------------------------------------------- /.changes/v0.17.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.17.0 - December 13, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Allow to combine audit logs and information schema jobs table to have the best of both worlds 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.19.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.19.0 - March 25, 2025 2 | 3 | ### Features 4 | 5 | 6 | - Update the information schema models and fixes partitioning / clustering detection on those base tables 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/table/most_expensive_tables.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT * 7 | FROM {{ ref('storage_with_cost') }} 8 | ORDER BY cost_monthly_forecast DESC 9 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 10 | -------------------------------------------------------------------------------- /.changes/v0.11.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.11.1 - November 08, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix lookback for compute billing and make it customizable via `lookback_incremental_billing_window_days` var 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.18.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.18.2 - February 11, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Move up the dbt-bigquery-monitoring tag should wrap also information schema for project_by_project_table materialization 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.19.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.19.3 - March 28, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix an incremental case where we would still insert more than 4000 partitions per statement on existing tables with full refresh 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.7.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.7.0 - September 01, 2024 2 | 3 | ### Features 4 | 5 | 6 | - new partitions monitoring model 7 | 8 | ### Fixes 9 | 10 | 11 | - Fix dataset based google models such as partitions 12 | 13 | ### Contributors 14 | - [@Kayrnt](https://github.com/Kayrnt) 15 | 16 | -------------------------------------------------------------------------------- /.changes/v0.21.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.21.0 - July 06, 2025 2 | 3 | ### Features 4 | 5 | 6 | - New variable system that ensure that variable priority is well respected to improve configuration consistency and fix options output 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # This file is used to load the sqlmesh configuration 2 | from pathlib import Path 3 | 4 | from sqlmesh.core.config import DuckDBConnectionConfig 5 | from sqlmesh.dbt.loader import sqlmesh_config 6 | 7 | config = sqlmesh_config( 8 | Path(__file__).parent, 9 | state_connection=DuckDBConnectionConfig(), 10 | ) 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Define the shell to use 2 | SHELL := /bin/bash 3 | 4 | # Define the name of the Docker image 5 | IMAGE_NAME := dbt-bigquery-monitoring-base 6 | 7 | test: 8 | uv run pytest 9 | 10 | lint: 11 | uv run sqlfluff lint 12 | 13 | fix: 14 | uv run sqlfluff fix 15 | 16 | build: 17 | docker build -t $(IMAGE_NAME):main . 18 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/time/compute_cost_per_hour_view.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | 7 | SELECT 8 | TIMESTAMP_TRUNC(HOUR, YEAR) AS year, 9 | TIMESTAMP_TRUNC(HOUR, MONTH) AS month, 10 | TIMESTAMP_TRUNC(HOUR, DAY) AS day, 11 | * 12 | FROM {{ ref('compute_cost_per_hour') }} 13 | -------------------------------------------------------------------------------- /.changes/header.tpl.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), 6 | and is generated by [Changie](https://github.com/miniscruff/changie). 7 | -------------------------------------------------------------------------------- /.changes/v0.10.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.10.0 - October 15, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Add partition_expiration_days to partition & move to view the partitions_monitoring model as it is just adding computed fields on stg_partitions_monitoring 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.20.4.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.20.4 - July 01, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix regressions regarding reservations fields (now builtin parser) 7 | 8 | - Fix reading the right tables in 'dataset' based tables (e.g. search indexes ones) 9 | 10 | ### Contributors 11 | - [@Kayrnt](https://github.com/Kayrnt) 12 | 13 | -------------------------------------------------------------------------------- /.changes/v0.5.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.5.3 - August 12, 2024 2 | 3 | ### Docs 4 | 5 | - Add missing YML for billing models ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 6 | 7 | ### Contributors 8 | - [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 9 | 10 | -------------------------------------------------------------------------------- /.changes/v0.5.4.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.5.4 - August 19, 2024 2 | 3 | ### Features 4 | 5 | - Add a model with properties ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 6 | 7 | ### Contributors 8 | - [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 9 | 10 | -------------------------------------------------------------------------------- /integration_tests/config.py: -------------------------------------------------------------------------------- 1 | # This file is used to load the sqlmesh configuration 2 | from pathlib import Path 3 | 4 | from sqlmesh.core.config import DuckDBConnectionConfig 5 | from sqlmesh.dbt.loader import sqlmesh_config 6 | 7 | config = sqlmesh_config( 8 | Path(__file__).parent, 9 | state_connection=DuckDBConnectionConfig(), 10 | ) 11 | -------------------------------------------------------------------------------- /macros/sharded_tables_merger.sql: -------------------------------------------------------------------------------- 1 | {#- macro replace the suffix from sharded table to merge as a single table for cost monitoring -#} 2 | {% macro sharded_table_merger(table_name_field) -%} 3 | REGEXP_REPLACE( 4 | REGEXP_REPLACE( 5 | {{ table_name_field }}, 6 | r"(\d{8,10})$", ""), 7 | r"(20\d\dQ[1-4])$", "") 8 | {%- endmacro %} 9 | -------------------------------------------------------------------------------- /.changes/v0.16.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.16.0 - December 09, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Support for audit logs ([#61](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/61)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#61](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/61)) 10 | 11 | -------------------------------------------------------------------------------- /docs/intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | slug: / 4 | --- 5 | 6 | # dbt-bigquery-monitoring 7 | 8 | dbt-bigquery-monitoring is a dbt package that provides models for monitoring BigQuery performance and costs. 9 | 10 | ## dbt compatibility 11 | 12 | The package is actively used with the latest dbt stable version which `1.9.1` at the time of writing. 13 | -------------------------------------------------------------------------------- /.changes/v0.13.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.13.0 - November 17, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Add BI engine usage monitoring ([#58](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/58)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#58](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/58)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.7.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.7.3 - September 30, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - rename macro to workaround a <1.6 bug ([#26](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/26)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#26](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/26)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.14.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.14.0 - November 18, 2024 2 | 3 | ### Features 4 | 5 | 6 | - New compute optimization oriented views ([#59](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/59)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#59](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/59)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.5.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.5.2 - August 01, 2024 2 | 3 | ### Fixes 4 | 5 | - Fix information_schema_schemata_links model information ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 6 | 7 | ### Contributors 8 | - [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 9 | 10 | -------------------------------------------------------------------------------- /.changes/v0.8.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.8.0 - October 02, 2024 2 | 3 | ### Features 4 | 5 | 6 | - partition expiration is now configurable 7 | 8 | - Leverage copy_partitions by default (but configurable as well) 9 | 10 | ### Under the Hood 11 | 12 | 13 | - Add CI testing 14 | 15 | ### Contributors 16 | - [@Kayrnt](https://github.com/Kayrnt) 17 | 18 | -------------------------------------------------------------------------------- /.changes/v0.5.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.5.1 - July 30, 2024 2 | 3 | ### Fixes 4 | 5 | - Fix YML model name misaligned with information schema dbt models ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 6 | 7 | ### Contributors 8 | - [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 9 | 10 | -------------------------------------------------------------------------------- /.changes/v0.18.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.18.3 - February 20, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix query_with_better_pricing views formulas 7 | 8 | - Fix job read when no data in existing table 9 | 10 | ### Under the Hood 11 | 12 | 13 | - remove unused jobs_done_incremental_minute 14 | 15 | ### Contributors 16 | - [@Kayrnt](https://github.com/Kayrnt) 17 | 18 | -------------------------------------------------------------------------------- /.changes/v0.7.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.7.2 - September 24, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix the version defined in get_query_comment macro ([#19](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/19)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#19](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/19)) 10 | 11 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/time/compute_cost_per_minute_view.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | TIMESTAMP_TRUNC(MINUTE, YEAR) AS year, 8 | TIMESTAMP_TRUNC(MINUTE, MONTH) AS month, 9 | TIMESTAMP_TRUNC(MINUTE, DAY) AS day, 10 | TIMESTAMP_TRUNC(MINUTE, HOUR) AS hour, 11 | * 12 | FROM {{ ref('compute_cost_per_minute') }} 13 | -------------------------------------------------------------------------------- /.changes/v0.20.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.20.1 - June 25, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Move type to config.meta.type to avoid linter warnings ([#124](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/124)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#124](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/124)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.6.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.6.0 - August 20, 2024 2 | 3 | ### Breaking Changes 4 | 5 | - Update google models, add new dataset with costs and clean up ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 6 | 7 | ### Contributors 8 | - [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 9 | 10 | -------------------------------------------------------------------------------- /.changes/v0.15.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.15.0 - December 03, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Add client type support to breakdown by type of workload ([#68](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/68)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#68](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/68)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.7.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.7.1 - September 24, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix the name of the dbt_bigquery_monitoring_options model ([#17](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/17)) 7 | 8 | ### Contributors 9 | - [@gofford](https://github.com/gofford) ([#17](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/17)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.14.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.14.1 - November 19, 2024 2 | 3 | ### Under the Hood 4 | 5 | 6 | - Update Google base models based on latest documentation parsing ([#71](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/71)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#71](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/71)) 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .idea/ 3 | .vscode/ 4 | .condarc 5 | # cache 6 | .cache 7 | # standard dbt ignore 8 | target/ 9 | dbt_modules/ 10 | dbt_packages/ 11 | logs/ 12 | /.user.yml 13 | package-lock.yml 14 | # ignore any __pycache__ directories 15 | __pycache__/ 16 | # reports 17 | reports/ 18 | # ci 19 | keyfile.json 20 | tmp_html_content.html 21 | # custom 22 | test_scripts/ 23 | dbt_internal_packages/ 24 | -------------------------------------------------------------------------------- /.changes/v0.14.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.14.2 - November 24, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Rework models to use `project_id` prefix if possible and use `ephemeral` if relevant ([#74](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/74)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#74](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/74)) 10 | 11 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/most_expensive_jobs.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | hour, 8 | query, 9 | j.* 10 | FROM {{ ref('jobs_costs_incremental') }}, UNNEST(jobs) AS j 11 | WHERE j.rank_cost <= {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 12 | ORDER BY query_cost DESC 13 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 14 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/slowest_jobs.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | hour, 8 | query, 9 | j.* 10 | FROM {{ ref('jobs_costs_incremental') }}, UNNEST(jobs) AS j 11 | WHERE j.rank_duration <= {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 12 | ORDER BY total_time_seconds DESC 13 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 14 | -------------------------------------------------------------------------------- /models/monitoring/base/dataset_options.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized=materialized_as_view_if_explicit_projects() 4 | ) 5 | }} 6 | SELECT 7 | catalog_name AS project_id, 8 | schema_name AS dataset_id, 9 | COALESCE(ANY_VALUE(IF(option_name = "storage_billing_model", option_value, NULL)), "LOGICAL") AS storage_billing_model 10 | FROM {{ ref('information_schema_schemata_options') }} 11 | GROUP BY project_id, dataset_id 12 | -------------------------------------------------------------------------------- /.changes/v0.11.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.11.0 - November 04, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Add storage costs to daily_spend when using GCP cost export 7 | 8 | - Add compute_cost_per_minute for precise compute cost tracking 9 | 10 | ### Fixes 11 | 12 | 13 | - Add equality on incremental models to avoid cases where hours could be skipped 14 | 15 | ### Contributors 16 | - [@Kayrnt](https://github.com/Kayrnt) 17 | 18 | -------------------------------------------------------------------------------- /.sqlfluff: -------------------------------------------------------------------------------- 1 | [sqlfluff] 2 | exclude_rules = ST07, AM04, CV03, LT05, ST06, RF04, AM06, ST05, LT02, CP02, LT07, LT14, RF01 3 | dialect = bigquery 4 | templater = dbt 5 | max_line_length = 120 6 | 7 | [sqlfluff:indentation] 8 | tab_space_size = 2 9 | indent_unit = space 10 | 11 | [sqlfluff:rules:capitalisation.keywords] 12 | capitalisation_policy = upper 13 | 14 | [sqlfluff:rules:capitalisation.functions] 15 | extended_capitalisation_policy = upper 16 | 17 | -------------------------------------------------------------------------------- /.changes/v0.19.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.19.1 - March 26, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Support hard refreshing tables when the partitioning / clustering changes 7 | 8 | - Ensure we have proper aligned columns to insert the data in project by project table 9 | 10 | - Ensure that the incremental case won't try to insert more than 4000 partitions per statement 11 | 12 | ### Contributors 13 | - [@Kayrnt](https://github.com/Kayrnt) 14 | 15 | -------------------------------------------------------------------------------- /models/information_schema/bi_engine/information_schema_bi_capacities.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-bi-capacities -#} 3 | 4 | SELECT 5 | project_id, 6 | project_number, 7 | bi_capacity_name, 8 | size, 9 | preferred_tables 10 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`BI_CAPACITIES` 11 | -------------------------------------------------------------------------------- /.changes/v0.9.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.9.1 - October 09, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Fixes queries related datamart that were storing running/pending jobs thus returning partial numbers 7 | 8 | - Fix is_incremental_run that wasn't working and replaced with a macro to inline proper incremental behavior 9 | 10 | - Bugs on macros copy_partition / gcp billing export 11 | 12 | ### Contributors 13 | - [@Kayrnt](https://github.com/Kayrnt) 14 | 15 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/query_with_better_pricing_using_on_demand_view.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | *, 8 | flat_pricing_query_cost - ondemand_query_cost AS cost_savings, 9 | 1 - SAFE_DIVIDE(flat_pricing_query_cost, ondemand_query_cost) AS cost_savings_pct 10 | FROM 11 | {{ ref('jobs_with_cost') }} 12 | WHERE 13 | flat_pricing_query_cost > ondemand_query_cost 14 | AND ondemand_query_cost > 0 15 | -------------------------------------------------------------------------------- /models/monitoring/global/datamart/dbt_bigquery_monitoring_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dbt_bigquery_monitoring_options 5 | description: This model contains current configuration options and values for dbt BigQuery monitoring extension. 6 | columns: 7 | - name: option_label 8 | description: The label of the configuration option. 9 | - name: option_value 10 | description: The value of the configuration option. 11 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata_options.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-datasets-schemata-options -#} 3 | 4 | SELECT 5 | catalog_name, 6 | schema_name, 7 | option_name, 8 | option_type, 9 | option_value 10 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SCHEMATA_OPTIONS` 11 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/query_with_better_pricing_using_flat_pricing_view.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | *, 8 | ondemand_query_cost - flat_pricing_query_cost AS cost_savings, 9 | 1 - SAFE_DIVIDE(ondemand_query_cost, flat_pricing_query_cost) AS cost_savings_pct 10 | FROM 11 | {{ ref('jobs_with_cost') }} 12 | WHERE 13 | ondemand_query_cost > flat_pricing_query_cost 14 | AND flat_pricing_query_cost > 0 15 | -------------------------------------------------------------------------------- /.changes/v0.10.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.10.1 - October 19, 2024 2 | 3 | ### Under the Hood 4 | 5 | 6 | - Migrate in another repo documentation parser to reduce the package size from useless files 7 | 8 | - clean "No datasets found in the project list" log ([#35](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/35)) 9 | 10 | ### Contributors 11 | - [@Kayrnt](https://github.com/Kayrnt) ([#35](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/35)) 12 | 13 | -------------------------------------------------------------------------------- /.changes/v0.12.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.12.0 - November 16, 2024 2 | 3 | ### Features 4 | 5 | 6 | - Add reservation_usage_per_minute ([#57](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/57)) 7 | 8 | ### Fixes 9 | 10 | 11 | - Fix compute cost per minute that would override latest hour because it is leveraging insert_overwrite. 12 | 13 | ### Contributors 14 | - [@Kayrnt](https://github.com/Kayrnt) ([#57](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/57)) 15 | 16 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/unused_tables.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table', 4 | ) 5 | }} 6 | WITH last_used_date AS ( 7 | SELECT 8 | project_id, 9 | dataset_id, 10 | table_id, 11 | MAX(day) AS last_used_date 12 | FROM {{ ref('table_reference_incremental') }} 13 | GROUP BY ALL 14 | ) 15 | 16 | SELECT 17 | ts.*, 18 | lud.last_used_date 19 | FROM {{ ref('storage_with_cost') }} AS ts 20 | LEFT JOIN last_used_date AS lud USING (project_id, dataset_id, table_id) 21 | -------------------------------------------------------------------------------- /.changes/v0.14.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.14.3 - November 27, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix stg_partitions_monitoring that wasn't exhaustive because of a restriction on partition expiration based tables ([#79](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/79)) 7 | 8 | - Restrain stg_partitions_monitoring to partitioned tables 9 | 10 | ### Contributors 11 | - [@Kayrnt](https://github.com/Kayrnt) ([#79](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/79)) 12 | 13 | -------------------------------------------------------------------------------- /.changes/v0.5.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.5.0 - June 05, 2024 2 | 3 | ### Features 4 | 5 | - Add new models for storage monitoring ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 6 | - Add SQLMesh support ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 7 | 8 | ### Contributors 9 | - [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0), [#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0)) 10 | 11 | -------------------------------------------------------------------------------- /.changes/v0.22.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.22.0 - July 20, 2025 2 | 3 | ### Features 4 | 5 | 6 | - Add missing new INFORMATION_SCHEMA models and add organization models tag 7 | 8 | ### Fixes 9 | 10 | 11 | - Fix input_gcp_projects parsing and avoid fromjson for dbt_fusion compatibility 12 | 13 | ### Under the Hood 14 | 15 | 16 | - Update dbt-bigquery to 1.10.0 in CI 17 | 18 | - Disable by default organization models 19 | 20 | ### Contributors 21 | - [@Kayrnt](https://github.com/Kayrnt) 22 | 23 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-datasets-schemata -#} 3 | 4 | SELECT 5 | catalog_name, 6 | schema_name, 7 | schema_owner, 8 | creation_time, 9 | last_modified_time, 10 | location, 11 | ddl, 12 | default_collation_name 13 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SCHEMATA` 14 | -------------------------------------------------------------------------------- /models/information_schema/bi_engine/information_schema_bi_capacity_changes.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-bi-capacity-changes -#} 3 | 4 | SELECT 5 | change_timestamp, 6 | project_id, 7 | project_number, 8 | bi_capacity_name, 9 | size, 10 | user_email, 11 | preferred_tables 12 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`BI_CAPACITY_CHANGES` 13 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_assignments.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-assignments -#} 3 | 4 | SELECT 5 | ddl, 6 | project_id, 7 | project_number, 8 | assignment_id, 9 | reservation_name, 10 | job_type, 11 | assignee_id, 12 | assignee_number, 13 | assignee_type 14 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`ASSIGNMENTS` 15 | -------------------------------------------------------------------------------- /models/information_schema/streaming/information_schema_streaming_timeline.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-streaming -#} 3 | 4 | SELECT 5 | start_timestamp, 6 | project_id, 7 | project_number, 8 | dataset_id, 9 | table_id, 10 | error_code, 11 | total_requests, 12 | total_rows, 13 | total_input_bytes 14 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`STREAMING_TIMELINE` 15 | -------------------------------------------------------------------------------- /models/information_schema/write_api/information_schema_write_api_timeline.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-write-api -#} 3 | 4 | SELECT 5 | start_timestamp, 6 | project_id, 7 | project_number, 8 | dataset_id, 9 | table_id, 10 | stream_type, 11 | error_code, 12 | total_requests, 13 | total_rows, 14 | total_input_bytes 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`WRITE_API_TIMELINE` 16 | -------------------------------------------------------------------------------- /macros/currency_to_symbol.sql: -------------------------------------------------------------------------------- 1 | {#-- macro to transform an ISO 4217 (currency) to currency symbol #} 2 | {% macro currency_to_symbol(currency_field) -%} 3 | CASE {{ currency_field }} 4 | WHEN 'USD' THEN '$' 5 | WHEN 'EUR' THEN '€' 6 | WHEN 'JPY' THEN '¥' 7 | WHEN 'AUD' THEN 'A$' 8 | WHEN 'BRL' THEN 'R$' 9 | WHEN 'CAD' THEN 'C$' 10 | WHEN 'HKD' THEN 'HK$' 11 | WHEN 'INR' THEN '₹' 12 | WHEN 'IDR' THEN 'Rp' 13 | WHEN 'ILS' THEN '₪' 14 | WHEN 'MXN' THEN 'Mex$' 15 | WHEN 'NZD' THEN 'NZ$' 16 | WHEN 'GBP' THEN '£' 17 | ELSE {{ currency_field }} 18 | END 19 | {%- endmacro %} 20 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_capacity_commitments.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-capacity-commitments -#} 3 | 4 | SELECT 5 | ddl, 6 | project_id, 7 | project_number, 8 | capacity_commitment_id, 9 | commitment_plan, 10 | state, 11 | slot_count, 12 | edition, 13 | is_flat_rate, 14 | renewal_plan 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`CAPACITY_COMMITMENTS` 16 | -------------------------------------------------------------------------------- /models/monitoring/base/bigquery_audit_logs.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = "ephemeral", 4 | enabled = dbt_bigquery_monitoring_variable_enable_gcp_bigquery_audit_logs() 5 | ) 6 | }} 7 | SELECT 8 | logName, 9 | resource, 10 | protopayload_auditlog, 11 | textPayload, 12 | timestamp, 13 | receiveTimestamp, 14 | severity, 15 | insertId, 16 | httpRequest, 17 | operation, 18 | trace, 19 | spanId, 20 | traceSampled, 21 | sourceLocation, 22 | split, 23 | labels, 24 | errorGroups 25 | FROM {{ ref('bigquery_audit_logs_v2') }} 26 | -------------------------------------------------------------------------------- /.changes/v0.10.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.10.2 - October 26, 2024 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix information_schema_partition failing when schema isn't in the same gcp project as execution project 7 | 8 | - Add lookback for 3 days on gcp billing export usage ([#44](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/44)) 9 | 10 | ### Under the Hood 11 | 12 | 13 | - Clean up log in get_query_comment" 14 | 15 | ### Contributors 16 | - [@Kayrnt](https://github.com/Kayrnt) ([#44](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/44)) 17 | 18 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata_links.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-datasets-schemata-links -#} 3 | 4 | SELECT 5 | catalog_name, 6 | schema_name, 7 | linked_schema_catalog_number, 8 | linked_schema_catalog_name, 9 | linked_schema_name, 10 | linked_schema_creation_time, 11 | linked_schema_org_display_name 12 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SCHEMATA_LINKS` 13 | -------------------------------------------------------------------------------- /.changes/v0.18.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.18.1 - January 13, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix the full refresh mode for information_schema table that are not partitioned 7 | 8 | - Fix the number of inserts for stats by using the right statement as main 9 | 10 | ### Docs 11 | 12 | 13 | - Adjust the documentation to match the update of information schema materialization 14 | 15 | ### Under the Hood 16 | 17 | 18 | - Rework the structure of models to tag only the monitoring models 19 | 20 | ### Contributors 21 | - [@Kayrnt](https://github.com/Kayrnt) 22 | 23 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_project_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_project_options 4 | columns: 5 | - name: OPTION_NAME 6 | description: Option ID for the specified configuration setting. 7 | data_type: STRING 8 | - name: OPTION_DESCRIPTION 9 | description: The option description. 10 | data_type: STRING 11 | - name: OPTION_TYPE 12 | description: The data type of the OPTION_VALUE. 13 | data_type: STRING 14 | - name: OPTION_VALUE 15 | description: The current value of the option. 16 | data_type: STRING 17 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_assignment_changes.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-assignments-changes -#} 3 | 4 | SELECT 5 | change_timestamp, 6 | project_id, 7 | project_number, 8 | assignment_id, 9 | reservation_name, 10 | job_type, 11 | assignee_id, 12 | assignee_number, 13 | assignee_type, 14 | action, 15 | user_email, 16 | state 17 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`ASSIGNMENT_CHANGES` 18 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_organization_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_organization_options 4 | columns: 5 | - name: OPTION_NAME 6 | description: One of the name values in the options table. 7 | data_type: STRING 8 | - name: OPTION_DESCRIPTION 9 | description: The option description. 10 | data_type: STRING 11 | - name: OPTION_TYPE 12 | description: The data type of the OPTION_VALUE. 13 | data_type: STRING 14 | - name: OPTION_VALUE 15 | description: The current value of the option. 16 | data_type: STRING 17 | -------------------------------------------------------------------------------- /.changes/v0.20.2.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.20.2 - June 30, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix data duplication with project mode using default bq monitoring materialization ([#129](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/129)) 7 | 8 | ### Under the Hood 9 | 10 | 11 | - Rework the meta to put it under the config block to avoid dbt warnings 12 | 13 | - Remove query from information_schema_jobs_by_organization as not accessible 14 | 15 | ### Contributors 16 | - [@Kayrnt](https://github.com/Kayrnt) ([#129](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/129)) 17 | 18 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/table/table_with_potential_savings.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | project_id, 8 | dataset_id, 9 | table_id, 10 | total_logical_tb, 11 | total_physical_tb, 12 | logical_cost_monthly_forecast, 13 | physical_cost_monthly_forecast, 14 | optimal_storage_billing_model, 15 | potential_savings 16 | FROM {{ ref('storage_with_cost') }} 17 | WHERE 18 | table_type = 'BASE TABLE' 19 | AND potential_savings > 0 20 | ORDER BY storage_pricing_model_difference DESC 21 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 22 | -------------------------------------------------------------------------------- /models/information_schema/recommendations_and_insights/information_schema_insights.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-insights -#} 3 | 4 | SELECT 5 | insight_id, 6 | insight_type, 7 | subtype, 8 | project_id, 9 | project_number, 10 | description, 11 | last_updated_time, 12 | category, 13 | target_resources, 14 | state, 15 | severity, 16 | associated_recommendation_ids, 17 | additional_details 18 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`INSIGHTS` 19 | -------------------------------------------------------------------------------- /models/monitoring/global/datamart/daily_spend.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: daily_spend 5 | description: > 6 | A model that aggregates daily BigQuery costs. 7 | config: 8 | meta: 9 | label: "Daily Spend" 10 | order_fields_by: "label" 11 | group_label: "Global cost" 12 | columns: 13 | - name: day 14 | description: The day of the aggregated cost. 15 | - name: cost_category 16 | description: The category of the cost (e.g., "compute"). 17 | - name: cost 18 | description: The total cost for the day in the specified category. 19 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_reservations_timeline.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-reservation-timeline -#} 3 | 4 | SELECT 5 | autoscale, 6 | edition, 7 | ignore_idle_slots, 8 | labels, 9 | period_start, 10 | project_id, 11 | project_number, 12 | reservation_id, 13 | reservation_name, 14 | slots_assigned, 15 | slots_max_assigned, 16 | max_slots, 17 | scaling_mode 18 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`RESERVATIONS_TIMELINE` 19 | -------------------------------------------------------------------------------- /models/information_schema/streaming/information_schema_streaming_timeline_by_folder.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-folder"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-streaming-by-folder -#} 3 | 4 | SELECT 5 | start_timestamp, 6 | project_id, 7 | project_number, 8 | dataset_id, 9 | table_id, 10 | error_code, 11 | total_requests, 12 | total_rows, 13 | total_input_bytes 14 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`STREAMING_TIMELINE_BY_FOLDER` 15 | -------------------------------------------------------------------------------- /models/information_schema/recommendations_and_insights/information_schema_recommendations.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-recommendations -#} 3 | 4 | SELECT 5 | recommendation_id, 6 | recommender, 7 | subtype, 8 | project_id, 9 | project_number, 10 | description, 11 | last_updated_time, 12 | target_resources, 13 | state, 14 | primary_impact, 15 | priority, 16 | associated_insight_ids, 17 | additional_details 18 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`RECOMMENDATIONS` 19 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_reservations.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-reservations -#} 3 | 4 | SELECT 5 | ddl, 6 | project_id, 7 | project_number, 8 | reservation_name, 9 | ignore_idle_slots, 10 | slot_capacity, 11 | target_job_concurrency, 12 | autoscale, 13 | edition, 14 | primary_location, 15 | secondary_location, 16 | original_primary_location, 17 | labels, 18 | max_slots, 19 | scaling_mode 20 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`RESERVATIONS` 21 | -------------------------------------------------------------------------------- /models/information_schema/write_api/information_schema_write_api_timeline_by_folder.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-folder"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-write-api-by-folder -#} 3 | 4 | SELECT 5 | start_timestamp, 6 | project_id, 7 | project_number, 8 | dataset_id, 9 | table_id, 10 | stream_type, 11 | error_code, 12 | total_requests, 13 | total_rows, 14 | total_input_bytes 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`WRITE_API_TIMELINE_BY_FOLDER` 16 | -------------------------------------------------------------------------------- /.changes/v0.17.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.17.1 - January 05, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Fix bi engine statistics structure not consistent between audit logs & information schema 7 | 8 | - Fix materialized_as_view_if_explicit_projects macro as not working as intended in region mode 9 | 10 | - Protect get_query_comment against potential undefined 11 | 12 | ### Docs 13 | 14 | 15 | - Improve the documentation regarding combining audit logs with information schema jobs data 16 | 17 | ### Under the Hood 18 | 19 | 20 | - Small changes to fix compatibility with SQLMesh 21 | 22 | ### Contributors 23 | - [@Kayrnt](https://github.com/Kayrnt) 24 | 25 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: [ CHANGELOG.md ] 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v4 14 | 15 | - name: Get the latest version 16 | id: latest 17 | uses: miniscruff/changie-action@v2 18 | with: 19 | version: latest 20 | args: latest 21 | 22 | - name: Release 23 | uses: softprops/action-gh-release@v2 24 | with: 25 | body_path: ".changes/${{ steps.latest.outputs.output }}.md" 26 | tag_name: "${{ steps.latest.outputs.output }}" 27 | -------------------------------------------------------------------------------- /models/information_schema/streaming/information_schema_streaming_timeline_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-streaming-by-organization -#} 3 | 4 | SELECT 5 | start_timestamp, 6 | project_id, 7 | project_number, 8 | dataset_id, 9 | table_id, 10 | error_code, 11 | total_requests, 12 | total_rows, 13 | total_input_bytes 14 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`STREAMING_TIMELINE_BY_ORGANIZATION` 15 | -------------------------------------------------------------------------------- /integration_tests/profiles.yml: -------------------------------------------------------------------------------- 1 | dbt_bigquery_monitoring: 2 | target: ci 3 | outputs: 4 | ci: 5 | type: bigquery 6 | method: service-account 7 | project: "{{ env_var('DBT_ENV_SECRET_BIGQUERY_TEST_STORAGE_PROJECT') }}" 8 | execution_project: "{{ env_var('DBT_ENV_SECRET_BIGQUERY_TEST_EXECUTION_PROJECT') }}" 9 | dataset: dbt_bigquery_monitoring_test_commit_{{ env_var('GITHUB_SHA_OVERRIDE', '') if env_var('GITHUB_SHA_OVERRIDE', '') else env_var('GITHUB_SHA') }} 10 | threads: 8 11 | keyfile: ./keyfile.json 12 | timeout_seconds: 300 13 | location: "{{ env_var('DBT_ENV_SECRET_BIGQUERY_TEST_LOCATION') }}" 14 | retries: 0 15 | -------------------------------------------------------------------------------- /.changes/v0.20.3.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.20.3 - July 01, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Use dataset level for indexes tables as required by the documentation ([#128](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/128)) 7 | 8 | ### Under the Hood 9 | 10 | 11 | - Disable by default 'by folder' models as they are often problematic for users that don't have folders ([#128](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/128)) 12 | 13 | ### Contributors 14 | - [@Kayrnt](https://github.com/Kayrnt) ([#128](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/128), [#128](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/128)) 15 | 16 | -------------------------------------------------------------------------------- /models/information_schema/write_api/information_schema_write_api_timeline_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-write-api-by-organization -#} 3 | 4 | SELECT 5 | start_timestamp, 6 | project_id, 7 | project_number, 8 | dataset_id, 9 | table_id, 10 | stream_type, 11 | error_code, 12 | total_requests, 13 | total_rows, 14 | total_input_bytes 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`WRITE_API_TIMELINE_BY_ORGANIZATION` 16 | -------------------------------------------------------------------------------- /macros/materalization_information_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro dbt_bigquery_monitoring_materialization() %} 2 | {% set projects = project_list() %} 3 | {#- If the user has set the materialization in the config that's different from the default -#} 4 | {% if dbt_bigquery_monitoring_variable_google_information_schema_model_materialization() != 'placeholder' %} 5 | {% set materialization = dbt_bigquery_monitoring_variable_google_information_schema_model_materialization() %} 6 | {% elif projects|length == 0 %} 7 | {% set materialization = 'ephemeral' %} 8 | {% else %} 9 | {% set materialization = 'project_by_project_table' %} 10 | {% endif %} 11 | {{ return(materialization) }} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_capacity_commitment_changes.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-capacity-commitment-changes -#} 3 | 4 | SELECT 5 | change_timestamp, 6 | project_id, 7 | project_number, 8 | capacity_commitment_id, 9 | commitment_plan, 10 | state, 11 | slot_count, 12 | action, 13 | user_email, 14 | commitment_start_time, 15 | commitment_end_time, 16 | failure_status, 17 | renewal_plan, 18 | edition, 19 | is_flat_rate 20 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`CAPACITY_COMMITMENT_CHANGES` 21 | -------------------------------------------------------------------------------- /.changes/v0.23.1.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.23.1 - October 01, 2025 2 | 3 | ### Fixes 4 | 5 | 6 | - Handle trailing dbt metadata comment ([#150](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/150)) 7 | 8 | - Add new model for BigQuery audit logs and update references in existing model ([#144](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/144)) 9 | 10 | ### Under the Hood 11 | 12 | 13 | - flatten models that were using jobs_by_project_with_cost instead jobs_with_cost 14 | 15 | ### Contributors 16 | - [@Kayrnt](https://github.com/Kayrnt) ([#150](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/150), [#144](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/144)) 17 | 18 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_reservation_changes.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-reservation-changes -#} 3 | 4 | SELECT 5 | change_timestamp, 6 | project_id, 7 | project_number, 8 | reservation_name, 9 | ignore_idle_slots, 10 | action, 11 | slot_capacity, 12 | user_email, 13 | target_job_concurrency, 14 | autoscale, 15 | edition, 16 | primary_location, 17 | secondary_location, 18 | original_primary_location, 19 | labels, 20 | max_slots, 21 | scaling_mode 22 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`RESERVATION_CHANGES` 23 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 4 3 | slug: /installation 4 | --- 5 | 6 | # Installation 7 | 8 | ## Installing the package to your dbt project 9 | 10 | Add the following to your `packages.yml` file: 11 | 12 | ```yml 13 | packages: 14 | - package: bqbooster/dbt_bigquery_monitoring 15 | version: 0.23.1 16 | ``` 17 | 18 | ## Set up an output dataset 19 | 20 | In your dbt_project.yml file, add the following configuration: 21 | 22 | ```yml 23 | models: 24 | ## dbt-bigquery-models models will be created in the schema '_dbt_bigquery_monitoring' (or anything related if you override output schema system through a macro) 25 | dbt_bigquery_monitoring: 26 | +schema: "dbt_bigquery_monitoring" 27 | ``` 28 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/table/read_heavy_tables.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | 7 | WITH table_reference AS ( 8 | SELECT 9 | project_id, 10 | dataset_id, 11 | table_id, 12 | SUM(reference_count) AS reference_count 13 | FROM {{ ref('table_reference_incremental') }} 14 | WHERE day > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL {{ dbt_bigquery_monitoring_variable_lookback_window_days() }} DAY) 15 | GROUP BY ALL 16 | ) 17 | 18 | SELECT 19 | ts.*, 20 | trc.reference_count 21 | FROM {{ ref('storage_with_cost') }} AS ts 22 | INNER JOIN table_reference AS trc USING (project_id, dataset_id, table_id) 23 | ORDER BY trc.reference_count DESC 24 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 25 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/user/most_expensive_users.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | TIMESTAMP_TRUNC(HOUR, DAY) AS day, 8 | user_email, 9 | ROUND(SUM(total_query_cost) / NULLIF(SUM(query_count), 0), 4) AS avg_query_cost, 10 | ROUND(SUM(total_query_cost), 2) AS total_query_cost, 11 | SUM(total_slot_ms) AS total_slot_ms, 12 | SUM(query_count) AS query_count, 13 | ROUND(SUM(total_slot_ms) / NULLIF(SUM(query_count), 0) / 1000, 2) AS avg_slot_seconds_per_query, 14 | SUM(cache_hit) / NULLIF(SUM(query_count), 0) AS cache_hit_ratio 15 | FROM {{ ref('users_costs_incremental') }} 16 | GROUP BY day, user_email 17 | ORDER BY total_query_cost DESC 18 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 19 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/most_repeated_jobs.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | query, 8 | APPROX_TOP_SUM(j.project_id, 1, 100) AS project_ids, 9 | APPROX_TOP_SUM(j.reservation_id, 1, 100) AS reservation_ids, 10 | APPROX_TOP_SUM(j.user_email, 1, 100) AS user_emails, 11 | SUM(t.cache_hit) / SUM(t.query_count) AS cache_hit_ratio, 12 | SUM(ROUND(total_query_cost, 2)) AS total_query_cost, 13 | SUM(t.total_slot_ms) AS total_slot_ms, 14 | SUM(t.query_count) AS query_count, 15 | SUM(t.cache_hit) AS cache_hit, 16 | FROM {{ ref('jobs_costs_incremental') }} AS t, UNNEST(jobs) AS j 17 | GROUP BY query 18 | HAVING query_count > 1 19 | ORDER BY query_count DESC 20 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 21 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_project_options.sql: -------------------------------------------------------------------------------- 1 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-project-options -#} 2 | {# Required role/permissions: To get configuration options metadata, you need the following Identity and Access Management (IAM) permissions: 3 | bigquery.config.get 4 | The following predefined IAM role includes the 5 | permissions that you need in order to get project options metadata: 6 | roles/bigquery.jobUser 7 | For more information about granular BigQuery permissions, see 8 | roles and permissions. -#} 9 | 10 | SELECT 11 | option_name, 12 | option_description, 13 | option_type, 14 | option_value 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`PROJECT_OPTIONS` 16 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_project_options_changes.sql: -------------------------------------------------------------------------------- 1 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-project-options-changes -#} 2 | {# Required role/permissions: To get the configuration, you need the bigquery.config.update 3 | Identity and Access Management (IAM) permission at the project level. The predefined 4 | IAM role roles/bigquery.admin includes the permissions that you 5 | need to create a configuration.For more information about granular BigQuery permissions, see 6 | roles and permissions. -#} 7 | 8 | SELECT 9 | update_time, 10 | username, 11 | updated_options, 12 | project_id, 13 | project_number 14 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`PROJECT_OPTIONS_CHANGES` 15 | -------------------------------------------------------------------------------- /models/information_schema/access_control/information_schema_object_privileges.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='ephemeral') }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-object-privileges -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.OBJECT_PRIVILEGES view, you need following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.datasets.get for datasets. 6 | bigquery.tables.getIamPolicy for tables and views. 7 | For more information about BigQuery permissions, see 8 | Access control with IAM. -#} 9 | 10 | SELECT 11 | object_catalog, 12 | object_schema, 13 | object_name, 14 | object_type, 15 | privilege_type, 16 | grantee 17 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`OBJECT_PRIVILEGES` 18 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_organization_options.sql: -------------------------------------------------------------------------------- 1 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-organization-options -#} 2 | {# Required role/permissions: To get organization options metadata, you need the following Identity and Access Management (IAM) permissions: 3 | bigquery.config.get 4 | The following predefined IAM role includes the 5 | permissions that you need in order to get organization options metadata: 6 | roles/bigquery.jobUser 7 | For more information about granular BigQuery permissions, see 8 | roles and permissions. -#} 9 | 10 | SELECT 11 | option_name, 12 | option_description, 13 | option_type, 14 | option_value 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`ORGANIZATION_OPTIONS` 16 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "dbt_bigquery_monitoring" 2 | version: "0.23.1" 3 | require-dbt-version: [">=1.3.0", "<2.0.0"] 4 | config-version: 2 5 | 6 | profile: dbt_bigquery_monitoring 7 | 8 | model-paths: ["models"] 9 | 10 | clean-targets: 11 | - target 12 | - dbt_packages 13 | 14 | models: 15 | +start: Jan 1 2017 16 | dbt_bigquery_monitoring: 17 | +tags: 18 | - "dbt-bigquery-monitoring" 19 | information_schema: 20 | +tags: 21 | - "dbt-bigquery-monitoring-information-schema" 22 | monitoring: 23 | +on_schema_change: "append_new_columns" 24 | +tags: 25 | - "dbt-bigquery-monitoring-datamarts" 26 | compute: 27 | +tags: 28 | - "dbt-bigquery-monitoring-compute" 29 | storage: 30 | +tags: 31 | - "dbt-bigquery-monitoring-storage" 32 | -------------------------------------------------------------------------------- /models/monitoring/base/table_and_storage_with_cost.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized=materialized_as_view_if_explicit_projects() 4 | ) 5 | }} 6 | WITH 7 | information_schema_tables AS ( 8 | SELECT 9 | t.table_catalog AS project_id, 10 | t.table_schema AS dataset_id, 11 | t.table_name AS table_id, 12 | t.default_collation_name, 13 | t.is_insertable_into, 14 | t.is_typed, 15 | t.ddl 16 | FROM {{ ref('information_schema_tables') }} AS t 17 | ) 18 | 19 | SELECT 20 | t.project_id, 21 | t.dataset_id, 22 | t.table_id, 23 | t.default_collation_name, 24 | t.is_insertable_into, 25 | t.is_typed, 26 | t.ddl, 27 | s.* EXCEPT (project_id, dataset_id, table_id) 28 | FROM information_schema_tables AS t 29 | INNER JOIN {{ ref('storage_with_cost') }} AS s USING (project_id, dataset_id, table_id) 30 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_effective_project_options.sql: -------------------------------------------------------------------------------- 1 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-effective-project-options -#} 2 | {# Required role/permissions: To get effective project options metadata, you need the bigquery.config.get 3 | Identity and Access Management (IAM) permission.The following predefined IAM role includes the 4 | permissions that you need in order to get effective project options metadata: 5 | roles/bigquery.jobUser 6 | For more information about granular BigQuery permissions, see 7 | roles and permissions. -#} 8 | 9 | SELECT 10 | option_name, 11 | option_description, 12 | option_type, 13 | option_set_level, 14 | option_set_on_id 15 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`EFFECTIVE_PROJECT_OPTIONS` 16 | -------------------------------------------------------------------------------- /models/monitoring/base/gcp_billing_export_resource_v1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = "ephemeral", 4 | enabled = dbt_bigquery_monitoring_variable_enable_gcp_billing_export() 5 | ) 6 | }} 7 | SELECT 8 | billing_account_id, 9 | invoice, 10 | cost_type, 11 | service, 12 | sku, 13 | usage_start_time, 14 | usage_end_time, 15 | project, 16 | labels, 17 | system_labels, 18 | location, 19 | cost, 20 | currency, 21 | currency_conversion_rate, 22 | usage, 23 | credits, 24 | adjustment_info, 25 | export_time, 26 | tags, 27 | cost_at_list, 28 | transaction_type, 29 | seller_name 30 | FROM 31 | `{{ dbt_bigquery_monitoring_variable_gcp_billing_export_storage_project() }}.{{ dbt_bigquery_monitoring_variable_gcp_billing_export_dataset() }}.{{ dbt_bigquery_monitoring_variable_gcp_billing_export_table() }}` 32 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/billing/storage_billing_per_hour.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: storage_billing_per_hour 5 | description: > 6 | This model calculates the hourly storage cost for BigQuery storage SKUs from the GCP billing export data. 7 | config: 8 | meta: 9 | label: "Storage cost per hour" 10 | order_fields_by: "label" 11 | group_label: "Storage cost" 12 | columns: 13 | - name: hour 14 | description: The hour when the usage occurred, truncated to the hour. 15 | - name: storage_type 16 | description: The type of storage as described by the SKU. 17 | - name: storage_cost 18 | description: The total storage cost for the given hour and storage type. 19 | - name: currency_symbol 20 | description: The symbol of the currency used for the cost. 21 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/billing/compute_billing_per_hour.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: compute_billing_per_hour 5 | description: > 6 | This model calculates the hourly compute cost for BigQuery compute SKUs from the GCP billing export data. 7 | config: 8 | meta: 9 | label: "compute cost per hour" 10 | order_fields_by: "label" 11 | group_label: "compute cost" 12 | columns: 13 | - name: hour 14 | description: The hour when the usage occurred, truncated to the hour. 15 | - name: compute_type 16 | description: The type of compute as described by the SKU. 17 | - name: compute_cost 18 | description: The total compute cost for the given hour and compute type. 19 | - name: currency_symbol 20 | description: The symbol of the currency used for the cost. 21 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_snapshots.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-snapshots -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_SNAPSHOTS view, you need the 4 | bigquery.tables.list Identity and Access Management (IAM) permission for the dataset. 5 | The roles/bigquery.metadataViewer predefined role includes the required 6 | permission.For more information about BigQuery permissions, see 7 | Access control with IAM. -#} 8 | 9 | SELECT 10 | table_catalog, 11 | table_schema, 12 | table_name, 13 | base_table_catalog, 14 | base_table_schema, 15 | base_table_name, 16 | snapshot_time 17 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_SNAPSHOTS` 18 | -------------------------------------------------------------------------------- /models/monitoring/storage/intermediate/table_reference_incremental.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: table_reference_incremental 5 | description: This table is storing the reference count of each table in actual BigQuery queries. 6 | columns: 7 | - name: day 8 | data_type: timestamp 9 | description: day of the creation_time 10 | 11 | - name: project_id 12 | data_type: string 13 | description: GCP project id from the referenced table 14 | 15 | - name: dataset_id 16 | data_type: string 17 | description: dataset id from the referenced table 18 | 19 | - name: table_id 20 | data_type: string 21 | description: reference table 22 | 23 | - name: reference_count 24 | data_type: int64 25 | description: number of references to the table for the given day 26 | 27 | -------------------------------------------------------------------------------- /docs/configuration/gcp-billing.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 5.3 3 | slug: /configuration/gcp-billing 4 | --- 5 | 6 | # GCP Billing export 7 | 8 | GCP Billing export is a feature that allows you to export your billing data to BigQuery. It allows the package to track the real cost of your queries and storage overtime. 9 | 10 | To enable on GCP end, you can follow the [official documentation](https://cloud.google.com/billing/docs/how-to/export-data-bigquery) to set up the export. 11 | 12 | Then enable the GCP billing export monitoring in the package, you'll need to define the following settings in the `dbt_project.yml` file: 13 | 14 | ```yml 15 | vars: 16 | dbt_bigquery_monitoring_variable_enable_gcp_billing_export: true 17 | gcp_billing_export_storage_project: 'my-gcp-project' 18 | gcp_billing_export_dataset: 'my_dataset' 19 | gcp_billing_export_table: 'my_table' 20 | ``` 21 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_shared_dataset_usage.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'job_start_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-shared-dataset-usage -#} 3 | 4 | SELECT 5 | project_id, 6 | dataset_id, 7 | table_id, 8 | data_exchange_id, 9 | listing_id, 10 | job_start_time, 11 | job_end_time, 12 | job_id, 13 | job_project_number, 14 | job_location, 15 | linked_project_number, 16 | linked_dataset_id, 17 | subscriber_org_number, 18 | subscriber_org_display_name, 19 | job_principal_subject, 20 | num_rows_processed, 21 | total_bytes_processed 22 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SHARED_DATASET_USAGE` 23 | -------------------------------------------------------------------------------- /macros/materialization_view_if_explicit_projects.sql: -------------------------------------------------------------------------------- 1 | {#- check if we pass explicitly projects (project mode) to materialize as a view using project references. 2 | It avoids following BQ error: 3 | Within a standard SQL view, references to tables/views require explicit project IDs 4 | unless the entity is created in the same project that is issuing the query, 5 | but these references are not project-qualified: "region-XXX.INFORMATION_SCHEMA.XXX" 6 | -#} 7 | {% macro materialized_as_view_if_explicit_projects() -%} 8 | {%- set google_information_schema_model_materialization = dbt_bigquery_monitoring_materialization() %} 9 | {% if project_list() | length > 0 and google_information_schema_model_materialization != 'project_by_project_table' %} 10 | {{ return('project_by_project_view') }} 11 | {% else %} 12 | {{ return('view') }} 13 | {% endif %} 14 | {%- endmacro %} 15 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_organization_options_changes.sql: -------------------------------------------------------------------------------- 1 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-organization-options-changes -#} 2 | {# Required role/permissions: 3 | 4 | To get the permission that 5 | you need to get the configuration changes, 6 | 7 | ask your administrator to grant you the 8 | 9 | 10 | 11 | 12 | BigQuery Admin (roles/bigquery.admin) 13 | IAM role on your organization. 14 | 15 | 16 | 17 | 18 | 19 | 20 | For more information about granting roles, see Manage access to projects, folders, and organizations. 21 | 22 | -#} 23 | 24 | SELECT 25 | update_time, 26 | username, 27 | updated_options, 28 | project_id, 29 | project_number 30 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`ORGANIZATION_OPTIONS_CHANGES` 31 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/table/partitions_monitoring.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | SELECT 7 | *, 8 | CASE partition_type 9 | WHEN 'YEAR' THEN PARSE_TIMESTAMP('%Y', earliest_partition_id) 10 | WHEN 'MONTH' THEN PARSE_TIMESTAMP('%Y%m', earliest_partition_id) 11 | WHEN 'DAY' THEN PARSE_TIMESTAMP('%Y%m%d', earliest_partition_id) 12 | WHEN 'HOUR' THEN PARSE_TIMESTAMP('%Y%m%d%H', earliest_partition_id) 13 | END AS earliest_partition_time, 14 | CASE partition_type 15 | WHEN 'YEAR' THEN PARSE_TIMESTAMP('%Y', latest_partition_id) 16 | WHEN 'MONTH' THEN PARSE_TIMESTAMP('%Y%m', latest_partition_id) 17 | WHEN 'DAY' THEN PARSE_TIMESTAMP('%Y%m%d', latest_partition_id) 18 | WHEN 'HOUR' THEN PARSE_TIMESTAMP('%Y%m%d%H', latest_partition_id) 19 | END AS latest_partition_time 20 | FROM {{ ref('stg_partitions_monitoring') }} 21 | -------------------------------------------------------------------------------- /integration_tests/tests/jobs_with_cost_metadata.sql: -------------------------------------------------------------------------------- 1 | -- Validate that the jobs_with_cost metadata extraction handles comments at both the start and end of a query. 2 | WITH sample_queries AS ( 3 | SELECT 'leading_comment' AS scenario, 4 | '/* {"dbt_version": "1.9.2", "node_id": "model.package.example"} */ SELECT 1' AS query 5 | UNION ALL 6 | SELECT 'trailing_comment_with_whitespace' AS scenario, 7 | 'SELECT 1 /* {"dbt_version": "1.9.2", "node_id": "model.package.example"} */ ' AS query 8 | ) 9 | SELECT * 10 | FROM ( 11 | SELECT 12 | scenario, 13 | COALESCE( 14 | REPLACE(REPLACE(REGEXP_EXTRACT(query, r'^(\/\* \{+?[\w\W]+?\} \*\/)'), '/', ''), '*', ''), 15 | REPLACE(REPLACE(REGEXP_EXTRACT(query, r'(\/\* \{+?[\w\W]+?\} \*\/)\s*$'), '/', ''), '*', '') 16 | ) AS dbt_info 17 | FROM sample_queries 18 | ) 19 | WHERE dbt_info IS NULL 20 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_storage.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-storage -#} 3 | 4 | SELECT 5 | project_id, 6 | project_number, 7 | table_catalog, 8 | table_schema, 9 | table_name, 10 | creation_time, 11 | total_rows, 12 | total_partitions, 13 | total_logical_bytes, 14 | active_logical_bytes, 15 | long_term_logical_bytes, 16 | current_physical_bytes, 17 | total_physical_bytes, 18 | active_physical_bytes, 19 | long_term_physical_bytes, 20 | time_travel_physical_bytes, 21 | storage_last_modified_time, 22 | deleted, 23 | table_type, 24 | fail_safe_physical_bytes, 25 | last_metadata_index_refresh_time, 26 | table_deletion_reason, 27 | table_deletion_time 28 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE` 29 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_effective_project_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_effective_project_options 4 | columns: 5 | - name: OPTION_NAME 6 | description: Option ID for the specified configuration setting. 7 | data_type: STRING 8 | - name: OPTION_DESCRIPTION 9 | description: The option description. 10 | data_type: STRING 11 | - name: OPTION_TYPE 12 | description: The data type of the OPTION_VALUE. 13 | data_type: STRING 14 | - name: OPTION_SET_LEVEL 15 | description: The level in the hierarchy at which the setting is defined, with 16 | possible values of DEFAULT, ORGANIZATION, or PROJECTS. 17 | data_type: STRING 18 | - name: OPTION_SET_ON_ID 19 | description: "Set value based on value of OPTION_SET_LEVEL:\n \nIf DEFAULT,\ 20 | \ set to null.\nIf ORGANIZATION, set to \"\".\nIf PROJECT, set to ID." 21 | data_type: STRING 22 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_table_options 4 | columns: 5 | - name: TABLE_CATALOG 6 | description: The project ID of the project that contains the dataset 7 | data_type: STRING 8 | - name: TABLE_SCHEMA 9 | description: "The name of the dataset that contains the table or view also referred\n\ 10 | \ to as the datasetId" 11 | data_type: STRING 12 | - name: TABLE_NAME 13 | description: The name of the table or view also referred to as the tableId 14 | data_type: STRING 15 | - name: OPTION_NAME 16 | description: One of the name values in the options table 17 | data_type: STRING 18 | - name: OPTION_TYPE 19 | description: One of the data type values in the options table 20 | data_type: STRING 21 | - name: OPTION_VALUE 22 | description: One of the value options in the options table 23 | data_type: STRING 24 | -------------------------------------------------------------------------------- /models/information_schema/routines/information_schema_routine_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_routine_options 4 | columns: 5 | - name: SPECIFIC_CATALOG 6 | description: "The name of the project that contains the routine where the\n \ 7 | \ option is defined" 8 | data_type: STRING 9 | - name: SPECIFIC_SCHEMA 10 | description: "The name of the dataset that contains the routine where the option\n\ 11 | \ is defined" 12 | data_type: STRING 13 | - name: SPECIFIC_NAME 14 | description: The name of the routine 15 | data_type: STRING 16 | - name: OPTION_NAME 17 | description: One of the name values in the options table 18 | data_type: STRING 19 | - name: OPTION_TYPE 20 | description: One of the data type values in the options table 21 | data_type: STRING 22 | - name: OPTION_VALUE 23 | description: One of the value options in the options table 24 | data_type: STRING 25 | -------------------------------------------------------------------------------- /models/information_schema/views/information_schema_views.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-views -#} 3 | {# Required role/permissions: To get view metadata, you need the following Identity and Access Management (IAM) 4 | permissions: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the 8 | permissions that you need in order to get view metadata: 9 | roles/bigquery.admin 10 | roles/bigquery.dataEditor 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.dataViewer 13 | For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | table_catalog, 18 | table_schema, 19 | table_name, 20 | view_definition, 21 | check_option, 22 | use_standard_sql 23 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`VIEWS` 24 | -------------------------------------------------------------------------------- /models/information_schema/search_indexes/information_schema_search_index_columns.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_search_index_columns 4 | columns: 5 | - name: index_catalog 6 | description: The name of the project that contains the dataset. 7 | data_type: STRING 8 | - name: index_schema 9 | description: The name of the dataset that contains the index. 10 | data_type: STRING 11 | - name: table_name 12 | description: The name of the base table that the index is created on. 13 | data_type: STRING 14 | - name: index_name 15 | description: The name of the index. 16 | data_type: STRING 17 | - name: index_column_name 18 | description: The name of the top-level indexed column. 19 | data_type: STRING 20 | - name: index_field_path 21 | description: "The full path of the expanded indexed field, starting with the column\n\ 22 | \ name. Fields are separated by a period." 23 | data_type: STRING 24 | -------------------------------------------------------------------------------- /models/information_schema/vector_indexes/information_schema_vector_index_columns.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_vector_index_columns 4 | columns: 5 | - name: index_catalog 6 | description: The name of the project that contains the dataset. 7 | data_type: STRING 8 | - name: index_schema 9 | description: The name of the dataset that contains the vector index. 10 | data_type: STRING 11 | - name: table_name 12 | description: The name of the table that the vector index is created on. 13 | data_type: STRING 14 | - name: index_name 15 | description: The name of the vector index. 16 | data_type: STRING 17 | - name: index_column_name 18 | description: The name of the indexed column. 19 | data_type: STRING 20 | - name: index_field_path 21 | description: "The full path of the expanded indexed field, starting with the column\n\ 22 | \ name. Fields are separated by a period." 23 | data_type: STRING 24 | -------------------------------------------------------------------------------- /models/information_schema/views/information_schema_views.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_views 4 | columns: 5 | - name: TABLE_CATALOG 6 | description: The name of the project that contains the dataset 7 | data_type: STRING 8 | - name: TABLE_SCHEMA 9 | description: "The name of the dataset that contains the view also referred to\ 10 | \ as the\n dataset id" 11 | data_type: STRING 12 | - name: TABLE_NAME 13 | description: The name of the view also referred to as the table id 14 | data_type: STRING 15 | - name: VIEW_DEFINITION 16 | description: The SQL query that defines the view 17 | data_type: STRING 18 | - name: CHECK_OPTION 19 | description: The value returned is always NULL 20 | data_type: STRING 21 | - name: USE_STANDARD_SQL 22 | description: "YES if the view was created by using a\n GoogleSQL query;\ 23 | \ NO if useLegacySql\n is set to true" 24 | data_type: STRING 25 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_options.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-options -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_OPTIONS view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | bigquery.routines.get 8 | bigquery.routines.list 9 | Each of the following predefined IAM roles includes the preceding 10 | permissions: 11 | roles/bigquery.admin 12 | roles/bigquery.dataViewer 13 | roles/bigquery.metadataViewer 14 | For more information about BigQuery permissions, see 15 | Access control with IAM. -#} 16 | 17 | SELECT 18 | table_catalog, 19 | table_schema, 20 | table_name, 21 | option_name, 22 | option_type, 23 | option_value 24 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_OPTIONS` 25 | -------------------------------------------------------------------------------- /models/information_schema/routines/information_schema_routine_options.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-routine-options -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.ROUTINE_OPTIONS view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.routines.get 6 | bigquery.routines.list 7 | Each of the following predefined IAM roles includes the 8 | permissions that you need in order to get routine metadata: 9 | roles/bigquery.admin 10 | roles/bigquery.metadataViewer 11 | roles/bigquery.dataViewer 12 | For more information about BigQuery permissions, see 13 | Access control with IAM. -#} 14 | 15 | SELECT 16 | specific_catalog, 17 | specific_schema, 18 | specific_name, 19 | option_name, 20 | option_type, 21 | option_value 22 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`ROUTINE_OPTIONS` 23 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_snapshots.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_table_snapshots 4 | columns: 5 | - name: table_catalog 6 | description: The name of the project that contains the table snapshot 7 | data_type: STRING 8 | - name: table_schema 9 | description: The name of the dataset that contains the table snapshot 10 | data_type: STRING 11 | - name: table_name 12 | description: The name of the table snapshot 13 | data_type: STRING 14 | - name: base_table_catalog 15 | description: The name of the project that contains the base table 16 | data_type: STRING 17 | - name: base_table_schema 18 | description: The name of the dataset that contains the base table 19 | data_type: STRING 20 | - name: base_table_name 21 | description: The name of the base table 22 | data_type: STRING 23 | - name: snapshot_time 24 | description: The time that the table snapshot was created 25 | data_type: TIMESTAMP 26 | -------------------------------------------------------------------------------- /models/monitoring/storage/intermediate/table_reference_incremental.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | cluster_by = ["project_id", "dataset_id", "table_id"], 6 | partition_by = { 7 | "field": "day", 8 | "data_type": "timestamp", 9 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 10 | }, 11 | partition_expiration_days = dbt_bigquery_monitoring_variable_lookback_window_days() 12 | ) 13 | }} 14 | SELECT 15 | TIMESTAMP_TRUNC(creation_time, DAY) AS day, 16 | rt.project_id, 17 | rt.dataset_id, 18 | rt.table_id, 19 | COUNT(*) AS reference_count 20 | FROM {{ ref('jobs_with_cost') }}, UNNEST(referenced_tables) AS rt 21 | {% if is_incremental() %} 22 | WHERE creation_time > _dbt_max_partition 23 | {% else %} 24 | WHERE creation_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL {{ dbt_bigquery_monitoring_variable_lookback_window_days() }} DAY) 25 | {% endif %} 26 | GROUP BY ALL 27 | -------------------------------------------------------------------------------- /models/information_schema/access_control/information_schema_object_privileges.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_object_privileges 4 | columns: 5 | - name: OBJECT_CATALOG 6 | description: The project ID of the project that contains the resource. 7 | data_type: STRING 8 | - name: OBJECT_SCHEMA 9 | description: "The name of the dataset that contains the resource. This is\n \ 10 | \ NULL if the resource itself is a dataset." 11 | data_type: STRING 12 | - name: OBJECT_NAME 13 | description: The name of the table, view, or dataset the policy applies to. 14 | data_type: STRING 15 | - name: OBJECT_TYPE 16 | description: The resource type, such as SCHEMA (dataset), TABLE, VIEW, and EXTERNAL. 17 | data_type: STRING 18 | - name: PRIVILEGE_TYPE 19 | description: The role ID, such as roles/bigquery.dataEditor. 20 | data_type: STRING 21 | - name: GRANTEE 22 | description: The user type and user that the role is granted to. 23 | data_type: STRING 24 | -------------------------------------------------------------------------------- /models/information_schema/sessions/information_schema_sessions_by_user.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-sessions-by-user -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.SESSIONS_BY_USER view, you need 4 | the bigquery.jobs.list Identity and Access Management (IAM) permission for the project. 5 | Each of the following predefined IAM roles includes the 6 | required permission: 7 | Project Viewer 8 | BigQuery User 9 | For more information about BigQuery permissions, see 10 | Access control with IAM. -#} 11 | 12 | SELECT 13 | creation_time, 14 | expiration_time, 15 | is_active, 16 | last_modified_time, 17 | project_id, 18 | project_number, 19 | session_id, 20 | user_email 21 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SESSIONS_BY_USER` 22 | -------------------------------------------------------------------------------- /models/information_schema/vector_indexes/information_schema_vector_index_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_vector_index_options 4 | columns: 5 | - name: index_catalog 6 | description: The name of the project that contains the dataset. 7 | data_type: STRING 8 | - name: index_schema 9 | description: The name of the dataset that contains the vector index. 10 | data_type: STRING 11 | - name: table_name 12 | description: The name of the table that the vector index is created on. 13 | data_type: STRING 14 | - name: index_name 15 | description: The name of the vector index. 16 | data_type: STRING 17 | - name: option_name 18 | description: "The name of the option used in the data definition language statement\n\ 19 | \ (DDL) to create the vector index." 20 | data_type: STRING 21 | - name: option_type 22 | description: The option data type. 23 | data_type: STRING 24 | - name: option_value 25 | description: The option value. 26 | data_type: STRING 27 | -------------------------------------------------------------------------------- /models/information_schema/bi_engine/information_schema_bi_capacities.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_bi_capacities 4 | columns: 5 | - name: project_id 6 | description: "The project ID of the project that contains BI Engine\n capacity." 7 | data_type: STRING 8 | - name: project_number 9 | description: "The project number of the project that contains\n BI Engine\ 10 | \ capacity." 11 | data_type: INTEGER 12 | - name: bi_capacity_name 13 | description: "The name of the object. Currently there can only be one capacity\ 14 | \ per\n project, hence the name is always set to default." 15 | data_type: STRING 16 | - name: size 17 | description: BI Engine RAM in bytes 18 | data_type: INTEGER 19 | - name: preferred_tables 20 | description: "Set of preferred tables this BI Engine capacity must be\n \ 21 | \ used for. If set to null, BI Engine capacity\n is used for all queries\ 22 | \ in the current project" 23 | data_type: REPEATED STRING 24 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_schemata_options 4 | columns: 5 | - name: CATALOG_NAME 6 | description: The name of the project that contains the dataset 7 | data_type: STRING 8 | - name: SCHEMA_NAME 9 | description: The name of the dataset, also referred to as the datasetId 10 | data_type: STRING 11 | - name: OPTION_NAME 12 | description: "The name of the option. For a list of supported options, see the\n\ 13 | \ schema options list.\n The storage_billing_model option is only\ 14 | \ displayed for\n datasets that have been updated after December 1, 2022.\ 15 | \ For datasets that\n were last updated before that date, the storage billing\ 16 | \ model is\n LOGICAL." 17 | data_type: STRING 18 | - name: OPTION_TYPE 19 | description: The data type of the option 20 | data_type: STRING 21 | - name: OPTION_VALUE 22 | description: The value of the option 23 | data_type: STRING 24 | -------------------------------------------------------------------------------- /models/information_schema/sessions/information_schema_sessions_by_project.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-sessions-by-project -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.SESSIONS_BY_PROJECT view, you need 4 | the bigquery.jobs.listAll Identity and Access Management (IAM) permission for the project. 5 | Each of the following predefined IAM roles includes the 6 | required permission: 7 | Project Owner 8 | BigQuery Admin 9 | For more information about BigQuery permissions, see 10 | Access control with IAM. -#} 11 | 12 | SELECT 13 | creation_time, 14 | expiration_time, 15 | is_active, 16 | last_modified_time, 17 | project_id, 18 | project_number, 19 | session_id, 20 | user_email 21 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SESSIONS_BY_PROJECT` 22 | -------------------------------------------------------------------------------- /models/information_schema/routines/information_schema_parameters.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-parameters -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.PARAMETERS view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.routines.get 6 | bigquery.routines.list 7 | Each of the following predefined IAM roles includes the 8 | permissions that you need to get routine metadata: 9 | roles/bigquery.admin 10 | roles/bigquery.metadataViewer 11 | roles/bigquery.dataViewer 12 | For more information about BigQuery permissions, see 13 | Access control with IAM. -#} 14 | 15 | SELECT 16 | specific_catalog, 17 | specific_schema, 18 | specific_name, 19 | ordinal_position, 20 | parameter_mode, 21 | is_result, 22 | parameter_name, 23 | data_type, 24 | parameter_default, 25 | is_aggregate 26 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`PARAMETERS` 27 | -------------------------------------------------------------------------------- /models/information_schema/vector_indexes/information_schema_vector_index_columns.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-vector-index-columns -#} 3 | {# Required role/permissions: To see vector index metadata, you need the 4 | bigquery.tables.get or bigquery.tables.list Identity and Access Management (IAM) 5 | permission on the table with the index. Each of the following predefined 6 | IAM roles includes at least one of these permissions: 7 | roles/bigquery.admin 8 | roles/bigquery.dataEditor 9 | roles/bigquery.dataOwner 10 | roles/bigquery.dataViewer 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.user 13 | For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | index_catalog, 18 | index_schema, 19 | table_name, 20 | index_name, 21 | index_column_name, 22 | index_field_path 23 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`VECTOR_INDEX_COLUMNS` 24 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_column_field_paths.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-column-field-paths -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.COLUMN_FIELD_PATHS view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.admin 10 | roles/bigquery.dataViewer 11 | roles/bigquery.dataEditor 12 | roles/bigquery.metadataViewer 13 | For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | table_catalog, 18 | table_schema, 19 | table_name, 20 | column_name, 21 | field_path, 22 | data_type, 23 | description, 24 | collation_name, 25 | rounding_mode, 26 | policy_tags 27 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`COLUMN_FIELD_PATHS` 28 | -------------------------------------------------------------------------------- /models/information_schema/vector_indexes/information_schema_vector_index_options.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-vector-index-options -#} 3 | {# Required role/permissions: To see vector index metadata, you need the 4 | bigquery.tables.get or bigquery.tables.list Identity and Access Management (IAM) 5 | permission on the table with the index. Each of the following predefined 6 | IAM roles includes at least one of these permissions: 7 | roles/bigquery.admin 8 | roles/bigquery.dataEditor 9 | roles/bigquery.dataOwner 10 | roles/bigquery.dataViewer 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.user 13 | For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | index_catalog, 18 | index_schema, 19 | table_name, 20 | index_name, 21 | option_name, 22 | option_type, 23 | option_value 24 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`VECTOR_INDEX_OPTIONS` 25 | -------------------------------------------------------------------------------- /.changes/v0.20.0.md: -------------------------------------------------------------------------------- 1 | ## dbt-bigquery-monitoring v0.20.0 - June 22, 2025 2 | 3 | ### Features 4 | 5 | 6 | - Add total_bytes_processed to compute cost per hour model and upstream models 7 | 8 | - Ensure minute-to-hour model consistency by adding missing metrics to rollup and cost models, creating reservation_usage_per_hour model, and standardizing schema definitions across all intermediate models 9 | 10 | - Update materialization logic to return 'project_by_project_view' when explicit projects are provided, ensuring proper handling of project references in views 11 | 12 | - Add new models for BI Engine and job analysis, including materialized view effectiveness and job failure patterns 13 | 14 | - Add materialization for project_by_project_view in BigQuery to support multi-project monitoring scenarios 15 | 16 | ### Docs 17 | 18 | 19 | - Update the doc to make clearer how to run the package 20 | 21 | ### Under the Hood 22 | 23 | 24 | - Update the information schema fields to add new fields 25 | 26 | ### Contributors 27 | - [@Kayrnt](https://github.com/Kayrnt) 28 | 29 | -------------------------------------------------------------------------------- /models/information_schema/search_indexes/information_schema_search_index_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_search_index_options 4 | columns: 5 | - name: index_catalog 6 | description: The name of the project that contains the dataset. 7 | data_type: STRING 8 | - name: index_schema 9 | description: The name of the dataset that contains the index. 10 | data_type: STRING 11 | - name: table_name 12 | description: The name of the base table that the index is created on. 13 | data_type: STRING 14 | - name: index_name 15 | description: The name of the index. 16 | data_type: STRING 17 | - name: option_name 18 | description: "The name of the option, which can be one of the following:\n \ 19 | \ analyzer, analyzer_options,\n data_types, or\n default_index_column_granularity." 20 | data_type: STRING 21 | - name: option_type 22 | description: The type of the option. 23 | data_type: STRING 24 | - name: option_value 25 | description: The value of the option. 26 | data_type: STRING 27 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/slowest_jobs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: slowest_jobs 5 | description: > 6 | A model that stores information about the slowest jobs. 7 | config: 8 | meta: 9 | label: "Slowest Jobs" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: hour 14 | description: The hour of the job execution. 15 | - name: project_id 16 | description: The project id of the job. 17 | - name: job_id 18 | description: The ID of the job. 19 | - name: query 20 | description: The SQL query executed by the job. 21 | - name: query_cost 22 | description: The cost of the query execution. 23 | - name: user_email 24 | description: The email of the user who initiated the job. 25 | - name: total_slot_ms 26 | description: The total number of slot time milliseconds used by the job. 27 | - name: total_run_time 28 | description: The total run time of the job in a human-readable format. 29 | -------------------------------------------------------------------------------- /models/information_schema/search_indexes/information_schema_search_index_column_options.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_search_index_column_options 4 | columns: 5 | - name: index_catalog 6 | description: The name of the project that contains the dataset. 7 | data_type: STRING 8 | - name: index_schema 9 | description: The name of the dataset that contains the index. 10 | data_type: STRING 11 | - name: table_name 12 | description: The name of the base table that the index is created on. 13 | data_type: STRING 14 | - name: index_name 15 | description: The name of the index. 16 | data_type: STRING 17 | - name: column_name 18 | description: The name of the indexed column that the option is set on. 19 | data_type: STRING 20 | - name: option_name 21 | description: The name of the option specified on the column. 22 | data_type: STRING 23 | - name: option_type 24 | description: The type of the option. 25 | data_type: STRING 26 | - name: option_value 27 | description: The value of the option. 28 | data_type: STRING 29 | -------------------------------------------------------------------------------- /.github/workflows/deploy_docs_test.yml: -------------------------------------------------------------------------------- 1 | name: Docs Test deployment 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | paths: 8 | - "docs/**" # Only trigger when files in docs directory change 9 | 10 | jobs: 11 | test-deploy: 12 | name: Test deployment 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | - uses: actions/setup-node@v4 19 | with: 20 | node-version: 20 21 | 22 | - uses: oven-sh/setup-bun@v2 23 | with: 24 | bun-version: latest 25 | 26 | - name: Clone docs repository 27 | run: | 28 | git clone https://github.com/bqbooster/dbt-bigquery-monitoring-docs.git 29 | 30 | - name: Copy docs content 31 | run: | 32 | mkdir -p dbt-bigquery-monitoring-docs/docs 33 | cp -r docs/* dbt-bigquery-monitoring-docs/docs/ 34 | 35 | - name: Install dependencies and build 36 | run: | 37 | cd dbt-bigquery-monitoring-docs 38 | bun install 39 | bun run build 40 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_project_options_changes.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_project_options_changes 4 | columns: 5 | - name: update_time 6 | description: The time the configuration change occurred. 7 | data_type: TIMESTAMP 8 | - name: username 9 | description: "For first-party users, it's their user email. For third-party users,\ 10 | \ it's\n the name that users set in the third-party identity provider." 11 | data_type: STRING 12 | - name: updated_options 13 | description: "A JSON object of the configuration options users updated in the\n\ 14 | \ change, containing the previous and the new values of updated fields." 15 | data_type: JSON 16 | - name: project_id 17 | description: "The project ID. This field is empty for organization-level\n \ 18 | \ configuration changes." 19 | data_type: STRING 20 | - name: project_number 21 | description: "The project number. This field is empty for the organization-level\n\ 22 | \ configuration changes." 23 | data_type: INTEGER 24 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/most_expensive_jobs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: most_expensive_jobs 5 | description: > 6 | A model that identifies the most expensive jobs. 7 | config: 8 | meta: 9 | label: "Most Expensive Jobs" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: hour 14 | description: The hour of the job execution. 15 | - name: project_id 16 | description: The project id of the job. 17 | - name: job_id 18 | description: The ID of the job. 19 | - name: query 20 | description: The SQL query executed by the job. 21 | - name: query_cost 22 | description: The cost of the job's query. 23 | - name: user_email 24 | description: The email of the user who initiated the job. 25 | - name: total_slot_ms 26 | description: The total number of slot time milliseconds used by the job. 27 | - name: total_slot_time 28 | description: The total number of slot time in human-readable format used by the job. 29 | -------------------------------------------------------------------------------- /models/information_schema/configuration/information_schema_organization_options_changes.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_organization_options_changes 4 | columns: 5 | - name: update_time 6 | description: The time the configuration change occurred. 7 | data_type: TIMESTAMP 8 | - name: username 9 | description: "For first-party users, it's their user email. For third-party users,\ 10 | \ it's\n the name that users set in the third-party identity provider." 11 | data_type: STRING 12 | - name: updated_options 13 | description: "A JSON object of the configuration options users updated in the\n\ 14 | \ change, containing the previous and the new values of updated fields." 15 | data_type: JSON 16 | - name: project_id 17 | description: "The project ID. This field is empty for organization-level\n \ 18 | \ configuration changes." 19 | data_type: STRING 20 | - name: project_number 21 | description: "The project number. This field is empty for the organization-level\n\ 22 | \ configuration changes." 23 | data_type: INTEGER 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 kayrnt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/cost/compute_cost_per_minute.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | on_schema_change='append_new_columns', 6 | partition_by={ 7 | "field": "minute", 8 | "granularity": "hour", 9 | "data_type": "timestamp", 10 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 11 | }, 12 | cluster_by = ['minute', 'project_id'], 13 | partition_expiration_days = dbt_bigquery_monitoring_variable_output_partition_expiration_days() 14 | ) 15 | }} 16 | SELECT 17 | minute, 18 | project_id, 19 | reservation_id, 20 | SUM(ROUND(total_query_cost, 2)) AS total_query_cost, 21 | SUM(ROUND(failing_query_cost, 2)) AS failing_query_cost, 22 | SUM(total_bytes_processed) AS total_bytes_processed, 23 | SUM(total_slot_ms) AS total_slot_ms, 24 | SUM(query_count) AS query_count, 25 | STRUCT( 26 | SUM(job_state.done) AS done, 27 | SUM(job_state.running) AS running, 28 | SUM(job_state.pending) AS pending 29 | ) AS job_state 30 | FROM {{ ref('compute_rollup_per_minute') }} 31 | GROUP BY ALL 32 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_schemata 4 | columns: 5 | - name: CATALOG_NAME 6 | description: The name of the project that contains the dataset 7 | data_type: STRING 8 | - name: SCHEMA_NAME 9 | description: The dataset's name also referred to as the datasetId 10 | data_type: STRING 11 | - name: SCHEMA_OWNER 12 | description: The value is always NULL 13 | data_type: STRING 14 | - name: CREATION_TIME 15 | description: The dataset's creation time 16 | data_type: TIMESTAMP 17 | - name: LAST_MODIFIED_TIME 18 | description: The dataset's last modified time 19 | data_type: TIMESTAMP 20 | - name: LOCATION 21 | description: The dataset's geographic location 22 | data_type: STRING 23 | - name: DDL 24 | description: "The CREATE SCHEMA\n DDL statement that can be used to create\ 25 | \ the dataset" 26 | data_type: STRING 27 | - name: DEFAULT_COLLATION_NAME 28 | description: "The name of the default collation specification\n if it exists;\ 29 | \ otherwise, NULL." 30 | data_type: STRING 31 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/user/users_costs_incremental.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | partition_by = { 6 | "field": "hour", 7 | "data_type": "timestamp", 8 | "granularity": "hour", 9 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 10 | }, 11 | cluster_by = ["user_email"], 12 | partition_expiration_days = dbt_bigquery_monitoring_variable_lookback_window_days() 13 | ) 14 | }} 15 | SELECT 16 | hour, 17 | user_email, 18 | SUM(query_cost) AS total_query_cost, 19 | SUM(total_slot_ms) AS total_slot_ms, 20 | COUNT(*) AS query_count, 21 | COUNTIF(cache_hit) AS cache_hit, 22 | COUNTIF(error_result IS NOT NULL) AS failed_queries, 23 | COUNT(DISTINCT project_id) AS projects_used, 24 | COUNT(DISTINCT reservation_id) AS reservations_used, 25 | AVG(total_time_seconds) AS avg_duration_seconds, 26 | SUM(total_bytes_processed) AS total_bytes_processed, 27 | -- Job type breakdown 28 | APPROX_TOP_SUM(job_type, 1, 20) AS job_types 29 | FROM {{ jobs_done_incremental_hourly() }} 30 | GROUP BY hour, user_email 31 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/most_repeated_jobs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: most_repeated_jobs 5 | description: > 6 | A model that stores information about the most repeated jobs. 7 | config: 8 | meta: 9 | label: "Most Repeated Jobs" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: query 14 | description: The SQL query. 15 | - name: project_ids 16 | description: The top aggregated project IDs. 17 | - name: reservation_ids 18 | description: The top aggregated reservation IDs. 19 | - name: user_emails 20 | description: The top aggregated user emails. 21 | - name: cache_hit_ratio 22 | description: The ratio of cache hits. 23 | - name: total_query_cost 24 | description: The total query cost. 25 | - name: total_slot_ms 26 | description: The total number of slot time milliseconds. 27 | - name: total_slot_time 28 | description: The total slot time in human-readable format. 29 | - name: query_count 30 | description: The total query count for the model. 31 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/cost/compute_cost_per_hour.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | on_schema_change='append_new_columns', 6 | partition_by={ 7 | "field": "hour", 8 | "granularity": "day", 9 | "data_type": "timestamp", 10 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 11 | }, 12 | cluster_by = ['hour', 'project_id'], 13 | partition_expiration_days = dbt_bigquery_monitoring_variable_output_partition_expiration_days() 14 | ) 15 | }} 16 | SELECT 17 | TIMESTAMP_TRUNC(MINUTE, HOUR) AS hour, 18 | project_id, 19 | reservation_id, 20 | SUM(ROUND(total_query_cost, 2)) AS total_query_cost, 21 | SUM(ROUND(failing_query_cost, 2)) AS failing_query_cost, 22 | SUM(total_bytes_processed) AS total_bytes_processed, 23 | SUM(total_slot_ms) AS total_slot_ms, 24 | SUM(query_count) AS query_count, 25 | STRUCT( 26 | SUM(job_state.done) AS done, 27 | SUM(job_state.running) AS running, 28 | SUM(job_state.pending) AS pending 29 | ) AS job_state 30 | FROM {{ ref("compute_cost_per_minute") }} 31 | GROUP BY ALL 32 | -------------------------------------------------------------------------------- /models/information_schema/routines/information_schema_routines.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-routines -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.ROUTINES view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.routines.get 6 | bigquery.routines.list 7 | Each of the following predefined IAM roles includes the 8 | permissions that you need in order to get routine metadata: 9 | roles/bigquery.admin 10 | roles/bigquery.metadataViewer 11 | roles/bigquery.dataViewer 12 | For more information about BigQuery permissions, see 13 | Access control with IAM. -#} 14 | 15 | SELECT 16 | specific_catalog, 17 | specific_schema, 18 | specific_name, 19 | routine_catalog, 20 | routine_schema, 21 | routine_name, 22 | routine_type, 23 | data_type, 24 | routine_body, 25 | routine_definition, 26 | external_language, 27 | is_deterministic, 28 | security_type, 29 | created, 30 | last_altered, 31 | ddl, 32 | connection 33 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`ROUTINES` 34 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_constraint_column_usage.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_constraint_column_usage 4 | columns: 5 | - name: TABLE_CATALOG 6 | description: The name of the project that contains the dataset. 7 | data_type: STRING 8 | - name: TABLE_SCHEMA 9 | description: "The name of the dataset that contains the table. Also\n referred\ 10 | \ to as the datasetId." 11 | data_type: STRING 12 | - name: TABLE_NAME 13 | description: "The name of the table. Also referred to as the\n tableId." 14 | data_type: STRING 15 | - name: COLUMN_NAME 16 | description: The column name. 17 | data_type: STRING 18 | - name: CONSTRAINT_CATALOG 19 | description: The constraint project name. 20 | data_type: STRING 21 | - name: CONSTRAINT_SCHEMA 22 | description: The constraint dataset name. 23 | data_type: STRING 24 | - name: CONSTRAINT_NAME 25 | description: "The constraint name. It can be the name of the\n primary key\ 26 | \ if the column is used by the primary key or the name of\n foreign key\ 27 | \ if the column is used by a foreign key." 28 | data_type: STRING 29 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/dbt/most_repeated_models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: most_repeated_models 5 | description: > 6 | A model that stores information about the most repeated dbt models. 7 | config: 8 | meta: 9 | label: "Most Repeated Models" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: dbt_model_name 14 | description: The name of the DBT model. 15 | - name: project_ids 16 | description: The top aggregated project IDs. 17 | - name: reservation_ids 18 | description: The top aggregated reservation IDs. 19 | - name: user_emails 20 | description: The top aggregated user emails. 21 | - name: cache_hit_ratio 22 | description: The ratio of cache hits. 23 | - name: total_query_cost 24 | description: The total query cost. 25 | - name: total_slot_ms 26 | description: The total number of slot time milliseconds. 27 | - name: total_slot_time 28 | description: The total slot time in human-readable format. 29 | - name: amount 30 | description: The total amount of occurrences. 31 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/billing/storage_billing_per_hour.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | on_schema_change='append_new_columns', 6 | partition_by={ 7 | "field": "hour", 8 | "data_type": "timestamp", 9 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 10 | }, 11 | enabled = dbt_bigquery_monitoring_variable_enable_gcp_billing_export(), 12 | partition_expiration_days = dbt_bigquery_monitoring_variable_output_partition_expiration_days() 13 | ) 14 | }} 15 | SELECT 16 | TIMESTAMP_TRUNC(usage_start_time, HOUR) AS hour, 17 | sku.description AS storage_type, 18 | COALESCE(SUM(cost), 0) AS storage_cost, 19 | {{ currency_to_symbol('currency') }} AS currency_symbol 20 | FROM {{ ref('gcp_billing_export_resource_v1') }} 21 | WHERE 22 | (service.description LIKE '%BigQuery%' 23 | AND LOWER(sku.description) LIKE '%storage%') 24 | {% if is_incremental() %} 25 | AND TIMESTAMP_TRUNC(usage_start_time, HOUR) >= TIMESTAMP_SUB(_dbt_max_partition, INTERVAL {{ dbt_bigquery_monitoring_variable_lookback_incremental_billing_window_days() }} DAY) 26 | {% endif %} 27 | GROUP BY ALL 28 | -------------------------------------------------------------------------------- /integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt_bigquery_monitoring_tests' 2 | version: '1.0.0' 3 | config-version: 2 4 | 5 | profile: dbt_bigquery_monitoring 6 | 7 | model-paths: ["models"] 8 | 9 | flags: 10 | send_anonymous_usage_stats: False 11 | use_colors: True 12 | 13 | clean-targets: 14 | - target 15 | - dbt_packages 16 | 17 | models: 18 | +start: Jan 1 2017 19 | 20 | vars: 21 | # We are using DBT_BQ_MONITORING_GCP_PROJECTS to set input_gcp_projects 22 | output_partition_expiration_days: 1 23 | lookback_window_days: 1 24 | # billing logs 25 | dbt_bigquery_monitoring_variable_enable_gcp_billing_export: true 26 | gcp_billing_export_storage_project: '{{ target.project }}' 27 | gcp_billing_export_dataset: '{{ target.dataset }}' 28 | gcp_billing_export_table: 'gcp_billing_export_fixture' 29 | # audit logs 30 | # We are using DBT_BQ_MONITORING_GCP_BIGQUERY_AUDIT_LOGS to set enable_gcp_bigquery_audit_logs 31 | gcp_bigquery_audit_logs_storage_project: '{{ target.project }}' 32 | gcp_bigquery_audit_logs_dataset: '{{ target.dataset }}' 33 | gcp_bigquery_audit_logs_table: 'gcp_bigquery_audit_logs_fixture' 34 | 35 | query-comment: 36 | comment: '{{ dbt_bigquery_monitoring.get_query_comment(node) }}' 37 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_columns.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-columns -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.COLUMNS view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.admin 10 | roles/bigquery.dataViewer 11 | roles/bigquery.dataEditor 12 | roles/bigquery.metadataViewer 13 | For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | table_catalog, 18 | table_schema, 19 | table_name, 20 | column_name, 21 | ordinal_position, 22 | is_nullable, 23 | data_type, 24 | is_generated, 25 | generation_expression, 26 | is_stored, 27 | is_hidden, 28 | is_updatable, 29 | is_system_defined, 30 | is_partitioning_column, 31 | clustering_ordinal_position, 32 | collation_name, 33 | column_default, 34 | rounding_mode, 35 | policy_tags 36 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`COLUMNS` 37 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/dbt/most_expensive_models.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | 7 | WITH model_aggregates AS ( 8 | SELECT 9 | dbt_model_name, 10 | ARRAY_CONCAT_AGG(project_ids) AS aggregated_project_ids, 11 | ARRAY_CONCAT_AGG(reservation_ids) AS aggregated_reservation_ids, 12 | ARRAY_CONCAT_AGG(user_emails) AS aggregated_user_emails, 13 | SUM(cache_hit) / SUM(query_count) AS cache_hit_ratio, 14 | SUM(ROUND(total_query_cost, 2)) AS total_query_cost, 15 | SUM(total_slot_ms) AS total_slot_ms, 16 | SUM(query_count) AS query_count, 17 | SUM(cache_hit) AS cache_hit 18 | FROM 19 | {{ ref('models_costs_incremental') }} 20 | GROUP BY dbt_model_name 21 | ) 22 | 23 | SELECT 24 | dbt_model_name, 25 | {{ top_sum_from_count('aggregated_project_ids') }} AS project_ids, 26 | {{ top_sum_from_count('aggregated_reservation_ids') }} AS reservation_ids, 27 | {{ top_sum_from_count('aggregated_user_emails') }} AS user_emails, 28 | cache_hit_ratio, 29 | total_query_cost, 30 | total_slot_ms, 31 | query_count, 32 | cache_hit 33 | FROM model_aggregates 34 | ORDER BY total_query_cost DESC 35 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 36 | -------------------------------------------------------------------------------- /models/information_schema/vector_indexes/information_schema_vector_indexes.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-vector-indexes -#} 3 | {# Required role/permissions: To see vector index metadata, you need the 4 | bigquery.tables.get or bigquery.tables.list Identity and Access Management (IAM) 5 | permission on the table with the index. Each of the following predefined 6 | IAM roles includes at least one of these permissions: 7 | roles/bigquery.admin 8 | roles/bigquery.dataEditor 9 | roles/bigquery.dataOwner 10 | roles/bigquery.dataViewer 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.user 13 | For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | index_catalog, 18 | index_schema, 19 | table_name, 20 | index_name, 21 | index_status, 22 | creation_time, 23 | last_modification_time, 24 | last_refresh_time, 25 | disable_time, 26 | disable_reason, 27 | ddl, 28 | coverage_percentage, 29 | unindexed_row_count, 30 | total_logical_bytes, 31 | total_storage_bytes 32 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`VECTOR_INDEXES` 33 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata_replicas.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-schemata-replicas -#} 3 | {# Required role/permissions: 4 | 5 | To get the permissions that 6 | you need to query the INFORMATION_SCHEMA.SCHEMATA_REPLICAS view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery Data Viewer (roles/bigquery.dataViewer) 14 | IAM role on the project. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | 24 | You might also be able to get 25 | the required permissions through custom 26 | roles or other predefined 27 | roles. 28 | -#} 29 | 30 | SELECT 31 | catalog_name, 32 | schema_name, 33 | replica_name, 34 | location, 35 | replica_primary_assigned, 36 | replica_primary_assignment_complete, 37 | creation_time, 38 | creation_complete, 39 | replication_time, 40 | sync_status 41 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SCHEMATA_REPLICAS` 42 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/dbt/most_repeated_models.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table', 4 | ) 5 | }} 6 | 7 | WITH model_aggregates AS ( 8 | SELECT 9 | dbt_model_name, 10 | ARRAY_CONCAT_AGG(project_ids) AS aggregated_project_ids, 11 | ARRAY_CONCAT_AGG(reservation_ids) AS aggregated_reservation_ids, 12 | ARRAY_CONCAT_AGG(user_emails) AS aggregated_user_emails, 13 | SUM(cache_hit) / SUM(query_count) AS cache_hit_ratio, 14 | SUM(ROUND(total_query_cost, 2)) AS total_query_cost, 15 | SUM(total_slot_ms) AS total_slot_ms, 16 | SUM(query_count) AS query_count, 17 | SUM(cache_hit) AS cache_hit 18 | FROM 19 | {{ ref('models_costs_incremental') }} 20 | GROUP BY dbt_model_name 21 | ) 22 | 23 | SELECT 24 | dbt_model_name, 25 | {{ top_sum_from_count('aggregated_project_ids') }} AS project_ids, 26 | {{ top_sum_from_count('aggregated_reservation_ids') }} AS reservation_ids, 27 | {{ top_sum_from_count('aggregated_user_emails') }} AS user_emails, 28 | cache_hit_ratio, 29 | total_query_cost, 30 | total_slot_ms, 31 | query_count, 32 | cache_hit 33 | FROM model_aggregates 34 | WHERE query_count > 1 35 | ORDER BY query_count DESC 36 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 37 | -------------------------------------------------------------------------------- /macros/materialization_project_by_project_view.sql: -------------------------------------------------------------------------------- 1 | {%- materialization project_by_project_view, adapter='bigquery' -%} 2 | 3 | -- grab current tables grants config for comparision later on 4 | {% set grant_config = config.get('grants') %} 5 | {% set projects = project_list() %} 6 | 7 | -- Build the SQL by unioning all projects 8 | {% set union_sqls = [] %} 9 | {% for project in projects %} 10 | {% set project_sql = sql | replace('`region-', '`' ~ project | trim ~ '`.`region-') %} 11 | {% do union_sqls.append('(' ~ project_sql ~ ')') %} 12 | {% endfor %} 13 | 14 | {% set final_sql = union_sqls | join('\nUNION ALL\n') %} 15 | 16 | {% call statement('main') -%} 17 | CREATE OR REPLACE VIEW `{{ this.database }}`.`{{ this.schema }}`.`{{ this.identifier }}` 18 | AS ( 19 | {{ final_sql }} 20 | ) 21 | {%- endcall %} 22 | 23 | {% set target_relation = this.incorporate(type='view') %} 24 | 25 | {% do persist_docs(target_relation, model) %} 26 | 27 | {% if config.get('grant_access_to') %} 28 | {% for grant_target_dict in config.get('grant_access_to') %} 29 | {% do adapter.grant_access_to(this, 'view', None, grant_target_dict) %} 30 | {% endfor %} 31 | {% endif %} 32 | 33 | {{ return({'relations': [target_relation]}) }} 34 | 35 | {%- endmaterialization -%} 36 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata_links.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_schemata_links 4 | columns: 5 | - name: CATALOG_NAME 6 | description: The name of the project that contains the source dataset. 7 | data_type: STRING 8 | - name: SCHEMA_NAME 9 | description: "The name of the source dataset. The dataset name is also referred\ 10 | \ to as\n the datasetId." 11 | data_type: STRING 12 | - name: LINKED_SCHEMA_CATALOG_NUMBER 13 | description: The project number of the project that contains the linked dataset. 14 | data_type: STRING 15 | - name: LINKED_SCHEMA_CATALOG_NAME 16 | description: The project name of the project that contains the linked dataset. 17 | data_type: STRING 18 | - name: LINKED_SCHEMA_NAME 19 | description: "The name of the linked dataset. The dataset name is also referred\ 20 | \ to as\n the datasetId." 21 | data_type: STRING 22 | - name: LINKED_SCHEMA_CREATION_TIME 23 | description: The time when the linked dataset was created. 24 | data_type: TIMESTAMP 25 | - name: LINKED_SCHEMA_ORG_DISPLAY_NAME 26 | description: The display name of the organization in which the linked dataset 27 | is created. 28 | data_type: STRING 29 | -------------------------------------------------------------------------------- /.github/workflows/build_base_image.yml: -------------------------------------------------------------------------------- 1 | name: Build base image for CI 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | paths: 8 | - 'Dockerfile' 9 | - 'Makefile' 10 | workflow_dispatch: 11 | 12 | # GitHub secrets 13 | env: 14 | DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} 15 | IMAGE_NAME: dbt-bigquery-monitoring-base 16 | BRANCH_NAME: ${{ github.head_ref || github.ref_name }} 17 | 18 | jobs: 19 | build-base-image: 20 | name: Build base image 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | with: 27 | ref: ${{ github.event.pull_request.head.sha }} # Check out the code of the PR 28 | 29 | - name: Log in to Docker Hub 30 | run: | 31 | echo ${{ secrets.DOCKER_REGISTRY_PASSWORD }} | docker login ${{ secrets.DOCKER_REGISTRY }} -u ${{ secrets.DOCKER_REGISTRY_USER }} --password-stdin 32 | 33 | - name: Build base image 34 | run: | 35 | docker build -t $IMAGE_NAME:$BRANCH_NAME . 36 | 37 | - name: Push base image 38 | run: | 39 | docker tag $IMAGE_NAME:$BRANCH_NAME $DOCKER_REGISTRY/$IMAGE_NAME:$BRANCH_NAME 40 | docker push $DOCKER_REGISTRY/$IMAGE_NAME:$BRANCH_NAME 41 | 42 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/billing/compute_billing_per_hour.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | on_schema_change='append_new_columns', 6 | partition_by={ 7 | "field": "hour", 8 | "data_type": "timestamp", 9 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 10 | }, 11 | enabled = dbt_bigquery_monitoring_variable_enable_gcp_billing_export(), 12 | partition_expiration_days = dbt_bigquery_monitoring_variable_output_partition_expiration_days() 13 | ) 14 | }} 15 | SELECT 16 | TIMESTAMP_TRUNC(usage_start_time, HOUR) AS hour, 17 | sku.description AS compute_type, 18 | COALESCE(SUM(cost), 0) AS compute_cost, 19 | {{ currency_to_symbol('currency') }} AS currency_symbol 20 | FROM {{ ref('gcp_billing_export_resource_v1') }} 21 | WHERE 22 | ((service.description = 'BigQuery' AND LOWER(sku.description) LIKE '%analysis%') 23 | OR (service.description IN ('BigQuery Reservation API', 'BigQuery BI Engine'))) 24 | {% if is_incremental() %} 25 | AND TIMESTAMP_TRUNC(usage_start_time, HOUR) >= TIMESTAMP_SUB(_dbt_max_partition, INTERVAL {{ dbt_bigquery_monitoring_variable_lookback_incremental_billing_window_days() }} DAY) 26 | {% endif %} 27 | GROUP BY ALL 28 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_storage_usage_timeline.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-storage-usage -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_STORAGE_USAGE_TIMELINE view, you need the 4 | following Identity and Access Management (IAM) permissions: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.dataViewer 10 | roles/bigquery.dataEditor 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.admin 13 | For queries with a region qualifier, you must have permissions for the project.For more information about BigQuery permissions, see 14 | Access control with IAM. -#} 15 | 16 | SELECT 17 | project_id, 18 | table_catalog, 19 | project_number, 20 | table_schema, 21 | table_name, 22 | billable_total_logical_usage, 23 | billable_active_logical_usage, 24 | billable_long_term_logical_usage, 25 | billable_total_physical_usage, 26 | billable_active_physical_usage, 27 | billable_long_term_physical_usage 28 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_USAGE_TIMELINE` 29 | -------------------------------------------------------------------------------- /models/information_schema/search_indexes/information_schema_search_indexes_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-indexes-by-organization -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.SEARCH_INDEXES_BY_ORGANIZATION view, you need 4 | the following Identity and Access Management (IAM) permissions for your organization: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.admin 10 | roles/bigquery.dataViewer 11 | roles/bigquery.dataEditor 12 | roles/bigquery.metadataViewer 13 | This schema view is only available to users with defined 14 | Google Cloud organizations.For more information about BigQuery permissions, see 15 | Access control with IAM. -#} 16 | 17 | SELECT 18 | project_id, 19 | project_number, 20 | index_catalog, 21 | index_schema, 22 | table_name, 23 | index_name, 24 | index_status, 25 | index_status_details, 26 | use_background_reservation 27 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SEARCH_INDEXES_BY_ORGANIZATION` 28 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_tables.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-tables -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLES view, you need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | bigquery.routines.get 8 | bigquery.routines.list 9 | Each of the following predefined IAM roles includes the preceding 10 | permissions: 11 | roles/bigquery.admin 12 | roles/bigquery.dataViewer 13 | roles/bigquery.metadataViewer 14 | For more information about BigQuery permissions, see 15 | Access control with IAM. -#} 16 | 17 | SELECT 18 | table_catalog, 19 | table_schema, 20 | table_name, 21 | table_type, 22 | is_insertable_into, 23 | is_typed, 24 | is_change_history_enabled, 25 | creation_time, 26 | base_table_catalog, 27 | base_table_schema, 28 | base_table_name, 29 | snapshot_time_ms, 30 | replica_source_catalog, 31 | replica_source_schema, 32 | replica_source_name, 33 | replication_status, 34 | replication_error, 35 | ddl, 36 | default_collation_name, 37 | upsert_stream_apply_watermark 38 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLES` 39 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/dbt/most_expensive_models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: most_expensive_models 5 | description: > 6 | A model that aggregates data on the most expensive dbt models. 7 | config: 8 | meta: 9 | label: "Most Expensive Models" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: dbt_model_name 14 | description: The name of the DBT model. 15 | - name: project_ids 16 | description: The top aggregated project IDs associated with the model. 17 | - name: reservation_ids 18 | description: The top aggregated reservation IDs associated with the model. 19 | - name: user_emails 20 | description: The top aggregated user emails associated with the model. 21 | - name: cache_hit_ratio 22 | description: The cache hit ratio for the model. 23 | - name: total_query_cost 24 | description: The total query cost for the model. 25 | - name: total_slot_ms 26 | description: The total slot time milliseconds for the model. 27 | - name: total_slot_time 28 | description: The total slot time in human-readable format for the model. 29 | - name: query_count 30 | description: The total query count for the model. 31 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_constraints.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-constraints -#} 3 | {# Required role/permissions: You need the following 4 | Identity and Access Management (IAM) permissions: 5 | bigquery.tables.get for viewing primary and foreign key definitions. 6 | bigquery.tables.list for viewing table information schemas. 7 | Each of the following 8 | predefined roles 9 | has the needed permissions to perform the workflows detailed in this document: 10 | roles/bigquery.dataEditor 11 | roles/bigquery.dataOwner 12 | roles/bigquery.admin 13 | Note: Roles are presented in ascending order of permissions granted. We 14 | recommend that you use predefined roles from earlier in the list to not allocate 15 | excess permissions.For more information about IAM roles and permissions in 16 | BigQuery, see 17 | Predefined roles and permissions. -#} 18 | 19 | SELECT 20 | constraint_catalog, 21 | constraint_schema, 22 | constraint_name, 23 | table_catalog, 24 | table_schema, 25 | table_name, 26 | constraint_type, 27 | is_deferrable, 28 | initially_deferred, 29 | enforced 30 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_CONSTRAINTS` 31 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/user/most_expensive_users.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: most_expensive_users 5 | description: > 6 | A model that calculates the most expensive users based on query cost. 7 | config: 8 | meta: 9 | label: "Most Expensive Users" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: day 14 | description: The day of the data. 15 | - name: user_email 16 | description: The email address of the user. 17 | - name: avg_query_cost 18 | description: The average query cost per user and day. 19 | - name: total_query_cost 20 | description: The total query cost for the user and day. 21 | - name: total_slot_ms 22 | description: The total slot milliseconds used by the user's queries and day. 23 | - name: total_slot_time 24 | description: The total slot time in human-readable format used by the user's queries and day. 25 | - name: query_count 26 | description: The total number of queries run by the user and day. 27 | - name: total_slot_seconds_per_query 28 | description: The total slot seconds per query used by the user and day. 29 | - name: cache_hit_ratio 30 | description: The cache hit ratio for the user and day. 31 | -------------------------------------------------------------------------------- /models/information_schema/jobs_timeline/information_schema_jobs_timeline.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'job_creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.JOBS_TIMELINE view, you need the 4 | bigquery.jobs.listAll Identity and Access Management (IAM) permission for the project. 5 | Each of the following predefined IAM roles includes the required 6 | permission: 7 | Project Owner 8 | BigQuery Admin 9 | For more information about BigQuery permissions, see 10 | Access control with IAM. -#} 11 | 12 | SELECT 13 | period_start, 14 | period_slot_ms, 15 | project_id, 16 | project_number, 17 | user_email, 18 | job_id, 19 | job_type, 20 | statement_type, 21 | priority, 22 | parent_job_id, 23 | job_creation_time, 24 | job_start_time, 25 | job_end_time, 26 | state, 27 | reservation_id, 28 | edition, 29 | total_bytes_billed, 30 | total_bytes_processed, 31 | error_result, 32 | cache_hit, 33 | period_shuffle_ram_usage_ratio, 34 | period_estimated_runnable_units, 35 | transaction_id 36 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_TIMELINE` 37 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_constraints.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_table_constraints 4 | columns: 5 | - name: CONSTRAINT_CATALOG 6 | description: The constraint project name. 7 | data_type: STRING 8 | - name: CONSTRAINT_SCHEMA 9 | description: The constraint dataset name. 10 | data_type: STRING 11 | - name: CONSTRAINT_NAME 12 | description: The constraint name. 13 | data_type: STRING 14 | - name: TABLE_CATALOG 15 | description: The constrained table project name. 16 | data_type: STRING 17 | - name: TABLE_SCHEMA 18 | description: The constrained table dataset name. 19 | data_type: STRING 20 | - name: TABLE_NAME 21 | description: The constrained table name. 22 | data_type: STRING 23 | - name: CONSTRAINT_TYPE 24 | description: 'Either PRIMARY KEY or 25 | 26 | FOREIGN KEY.' 27 | data_type: STRING 28 | - name: IS_DEFERRABLE 29 | description: "YES or NO depending on if a constraint is\n deferrable. Only\ 30 | \ NO is supported." 31 | data_type: STRING 32 | - name: INITIALLY_DEFERRED 33 | description: Only NO is supported. 34 | data_type: STRING 35 | - name: ENFORCED 36 | description: "YES or NO depending on if the constraint is\nenforced. \nOnly NO\ 37 | \ is supported." 38 | data_type: STRING 39 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_assignments.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_assignments 4 | columns: 5 | - name: ddl 6 | description: The DDL statement used to create this assignment. 7 | data_type: STRING 8 | - name: project_id 9 | description: ID of the administration project. 10 | data_type: STRING 11 | - name: project_number 12 | description: Number of the administration project. 13 | data_type: INTEGER 14 | - name: assignment_id 15 | description: ID that uniquely identifies the assignment. 16 | data_type: STRING 17 | - name: reservation_name 18 | description: Name of the reservation that the assignment uses. 19 | data_type: STRING 20 | - name: job_type 21 | description: "The type of job that can use the reservation. Can be\n PIPELINE,\ 22 | \ QUERY, CONTINUOUS,\n ML_EXTERNAL, or BACKGROUND." 23 | data_type: STRING 24 | - name: assignee_id 25 | description: ID that uniquely identifies the assignee resource. 26 | data_type: STRING 27 | - name: assignee_number 28 | description: Number that uniquely identifies the assignee resource. 29 | data_type: INTEGER 30 | - name: assignee_type 31 | description: "Type of assignee resource. Can be organization,\n folder\ 32 | \ or project." 33 | data_type: STRING 34 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_key_column_usage.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_key_column_usage 4 | columns: 5 | - name: CONSTRAINT_CATALOG 6 | description: The constraint project name. 7 | data_type: STRING 8 | - name: CONSTRAINT_SCHEMA 9 | description: The constraint dataset name. 10 | data_type: STRING 11 | - name: CONSTRAINT_NAME 12 | description: The constraint name. 13 | data_type: STRING 14 | - name: TABLE_CATALOG 15 | description: The project name of the constrained table. 16 | data_type: STRING 17 | - name: TABLE_SCHEMA 18 | description: The name of the constrained table dataset. 19 | data_type: STRING 20 | - name: TABLE_NAME 21 | description: The name of the constrained table. 22 | data_type: STRING 23 | - name: COLUMN_NAME 24 | description: The name of the constrained column. 25 | data_type: STRING 26 | - name: ORDINAL_POSITION 27 | description: 'The ordinal position of the column within the constraint key 28 | 29 | (starting at 1).' 30 | data_type: INT64 31 | - name: POSITION_IN_UNIQUE_CONSTRAINT 32 | description: 'For foreign keys, the ordinal position of the column within the 33 | 34 | primary key constraint (starting at 1). This value is NULL 35 | 36 | for primary key constraints.' 37 | data_type: INT64 38 | -------------------------------------------------------------------------------- /models/information_schema/views/information_schema_materialized_views.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_materialized_views 4 | columns: 5 | - name: TABLE_CATALOG 6 | description: "The name of the project that contains the dataset. Also referred\ 7 | \ to\n as the projectId." 8 | data_type: STRING 9 | - name: TABLE_SCHEMA 10 | description: "The name of the dataset that contains the materialized view. Also\n\ 11 | \ referred to as the datasetId." 12 | data_type: STRING 13 | - name: TABLE_NAME 14 | description: "The name of the materialized view. Also referred to as the\n \ 15 | \ tableId." 16 | data_type: STRING 17 | - name: LAST_REFRESH_TIME 18 | description: The time when this materialized view was last refreshed. 19 | data_type: TIMESTAMP 20 | - name: REFRESH_WATERMARK 21 | description: "The refresh watermark of the materialized view. The data contained\ 22 | \ in\n materialized view base tables up to this time are included in\ 23 | \ the\n materialized view cache." 24 | data_type: TIMESTAMP 25 | - name: LAST_REFRESH_STATUS 26 | description: "Error result of the last automatic refresh job as an ErrorProto\n\ 27 | \ object. If present, indicates that the last automatic refresh was unsuccessful." 28 | data_type: RECORD 29 | -------------------------------------------------------------------------------- /models/information_schema/jobs_timeline/information_schema_jobs_timeline_by_user.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'job_creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline-by-user -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.JOBS_TIMELINE_BY_USER view, you need the 4 | bigquery.jobs.list Identity and Access Management (IAM) permission for the project. 5 | Each of the following predefined IAM roles includes the required 6 | permission: 7 | Project Viewer 8 | BigQuery User 9 | For more information about BigQuery permissions, see 10 | Access control with IAM. -#} 11 | 12 | SELECT 13 | period_start, 14 | period_slot_ms, 15 | project_id, 16 | project_number, 17 | user_email, 18 | job_id, 19 | job_type, 20 | statement_type, 21 | priority, 22 | parent_job_id, 23 | job_creation_time, 24 | job_start_time, 25 | job_end_time, 26 | state, 27 | reservation_id, 28 | edition, 29 | total_bytes_billed, 30 | total_bytes_processed, 31 | error_result, 32 | cache_hit, 33 | period_shuffle_ram_usage_ratio, 34 | period_estimated_runnable_units, 35 | transaction_id 36 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_TIMELINE_BY_USER` 37 | -------------------------------------------------------------------------------- /models/information_schema/streaming/information_schema_streaming_timeline.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_streaming_timeline 4 | columns: 5 | - name: start_timestamp 6 | description: "(Partitioning column) Start timestamp of the 1 minute interval\n\ 7 | \ for the aggregated statistics." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: (Clustering column) ID of the project. 11 | data_type: STRING 12 | - name: project_number 13 | description: Number of the project. 14 | data_type: INTEGER 15 | - name: dataset_id 16 | description: (Clustering column) ID of the dataset. 17 | data_type: STRING 18 | - name: table_id 19 | description: (Clustering column) ID of the table. 20 | data_type: STRING 21 | - name: error_code 22 | description: "Error code returned for the requests specified by this row. NULL\ 23 | \ for\n successful requests." 24 | data_type: STRING 25 | - name: total_requests 26 | description: Total number of requests within the 1 minute interval. 27 | data_type: INTEGER 28 | - name: total_rows 29 | description: Total number of rows from all requests within the 1 minute interval. 30 | data_type: INTEGER 31 | - name: total_input_bytes 32 | description: Total number of bytes from all rows within the 1 minute interval. 33 | data_type: INTEGER 34 | -------------------------------------------------------------------------------- /models/information_schema/streaming/information_schema_streaming_timeline_by_folder.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_streaming_timeline_by_folder 4 | columns: 5 | - name: start_timestamp 6 | description: "(Partitioning column) Start timestamp of the 1 minute interval\n\ 7 | \ for the aggregated statistics." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: (Clustering column) ID of the project. 11 | data_type: STRING 12 | - name: project_number 13 | description: Number of the project. 14 | data_type: INTEGER 15 | - name: dataset_id 16 | description: (Clustering column) ID of the dataset. 17 | data_type: STRING 18 | - name: table_id 19 | description: (Clustering column) ID of the table. 20 | data_type: STRING 21 | - name: error_code 22 | description: "Error code returned for the requests specified by this row. NULL\ 23 | \ for\n successful requests." 24 | data_type: STRING 25 | - name: total_requests 26 | description: Total number of requests within the 1 minute interval. 27 | data_type: INTEGER 28 | - name: total_rows 29 | description: Total number of rows from all requests within the 1 minute interval. 30 | data_type: INTEGER 31 | - name: total_input_bytes 32 | description: Total number of bytes from all rows within the 1 minute interval. 33 | data_type: INTEGER 34 | -------------------------------------------------------------------------------- /models/monitoring/storage/intermediate/stg_partitions_monitoring.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_partitions_monitoring 5 | description: > 6 | A model that summarizes partition information for tables across projects and datasets. 7 | config: 8 | materialized: table 9 | columns: 10 | - name: project_id 11 | description: The project ID (TABLE_CATALOG) where the table resides. 12 | - name: dataset_id 13 | description: The dataset ID (TABLE_SCHEMA) where the table resides. 14 | - name: table_id 15 | description: The name of the table. 16 | - name: partition_type 17 | description: > 18 | The type of partitioning used for the table. Can be YEAR, MONTH, DAY, or HOUR, 19 | determined by the format of the partition_id. 20 | - name: earliest_partition_id 21 | description: The ID of the earliest partition for the table. 22 | - name: latest_partition_id 23 | description: The ID of the latest partition for the table. 24 | - name: partition_count 25 | description: The total number of partitions for the table. 26 | - name: sum_total_logical_bytes 27 | description: The sum of total_logical_bytes across all partitions of the table. 28 | - name: max_last_updated_time 29 | description: The most recent last_modified_time across all partitions of the table. 30 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_storage_usage_timeline_by_folder.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-folder"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-storage-usage-by-folder -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_STORAGE_USAGE_TIMELINE_BY_FOLDER view, you 4 | need the following Identity and Access Management (IAM) permissions for the parent folder of the project: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.dataViewer 10 | roles/bigquery.dataEditor 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.admin 13 | For more information about BigQuery permissions, see 14 | BigQuery IAM roles and permissions. -#} 15 | 16 | SELECT 17 | folder_numbers, 18 | project_id, 19 | table_catalog, 20 | project_number, 21 | table_schema, 22 | table_name, 23 | billable_total_logical_usage, 24 | billable_active_logical_usage, 25 | billable_long_term_logical_usage, 26 | billable_total_physical_usage, 27 | billable_active_physical_usage, 28 | billable_long_term_physical_usage 29 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_USAGE_TIMELINE_BY_FOLDER` 30 | -------------------------------------------------------------------------------- /models/information_schema/datasets/information_schema_schemata_replicas_by_failover_reservation.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-schemata-replicas-by-failover-reservation -#} 3 | {# Required role/permissions: 4 | 5 | To get the permissions that 6 | you need to query the INFORMATION_SCHEMA.SCHEMATA_REPLICAS_BY_FAILOVER_RESERVATION view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery Resource Viewer (roles/bigquery.resourceViewer) 14 | IAM role on the project. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | 24 | You might also be able to get 25 | the required permissions through custom 26 | roles or other predefined 27 | roles. 28 | -#} 29 | 30 | SELECT 31 | failover_reservation_project_id, 32 | failover_reservation_name, 33 | catalog_name, 34 | schema_name, 35 | replica_name, 36 | location, 37 | replica_primary_assigned, 38 | replica_primary_assignment_complete, 39 | creation_time, 40 | creation_complete, 41 | replication_time, 42 | sync_status 43 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SCHEMATA_REPLICAS_BY_FAILOVER_RESERVATION` 44 | -------------------------------------------------------------------------------- /models/information_schema/streaming/information_schema_streaming_timeline_by_organization.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_streaming_timeline_by_organization 4 | columns: 5 | - name: start_timestamp 6 | description: "(Partitioning column) Start timestamp of the 1 minute interval\n\ 7 | \ for the aggregated statistics." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: (Clustering column) ID of the project. 11 | data_type: STRING 12 | - name: project_number 13 | description: Number of the project. 14 | data_type: INTEGER 15 | - name: dataset_id 16 | description: (Clustering column) ID of the dataset. 17 | data_type: STRING 18 | - name: table_id 19 | description: (Clustering column) ID of the table. 20 | data_type: STRING 21 | - name: error_code 22 | description: "Error code returned for the requests specified by this row. NULL\ 23 | \ for\n successful requests." 24 | data_type: STRING 25 | - name: total_requests 26 | description: Total number of requests within the 1 minute interval. 27 | data_type: INTEGER 28 | - name: total_rows 29 | description: Total number of rows from all requests within the 1 minute interval. 30 | data_type: INTEGER 31 | - name: total_input_bytes 32 | description: Total number of bytes from all rows within the 1 minute interval. 33 | data_type: INTEGER 34 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/time/compute_cost_per_hour_view.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: compute_cost_per_hour_view 5 | description: > 6 | An enriched view over the model that stores the compute cost per hour. 7 | config: 8 | meta: 9 | label: "Compute cost per hour" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: year 14 | description: The year of the compute cost. 15 | - name: month 16 | description: The month of the compute cost. 17 | - name: day 18 | description: The day of the compute cost. 19 | - name: hour 20 | description: The hour of the compute cost. 21 | - name: project_id 22 | description: The project id of the job. 23 | - name: total_query_cost 24 | description: The total cost of all queries run during the hour. 25 | - name: failing_query_cost 26 | description: The total cost of all queries that failed during the hour. 27 | - name: total_slot_ms 28 | description: The total number of slot time milliseconds used by all queries during the hour. 29 | - name: total_slot_time 30 | description: The total number of slot time in human readable format used by all queries during the hour. 31 | - name: query_count 32 | description: The total number of queries run during the hour. 33 | -------------------------------------------------------------------------------- /models/monitoring/storage/intermediate/stg_partitions_monitoring.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table', 4 | ) 5 | }} 6 | WITH partition_expirations AS ( 7 | SELECT 8 | table_catalog, 9 | table_schema, 10 | table_name, 11 | SAFE_CAST(option_value AS INT64) AS partition_expiration_days 12 | FROM {{ ref('information_schema_table_options') }} 13 | WHERE option_name = 'partition_expiration_days' 14 | ) 15 | 16 | SELECT 17 | p.TABLE_CATALOG AS project_id, 18 | p.TABLE_SCHEMA AS dataset_id, 19 | p.table_name AS table_id, 20 | CASE 21 | WHEN REGEXP_CONTAINS(p.partition_id, r'^[0-9]{4}$') THEN 'YEAR' 22 | WHEN REGEXP_CONTAINS(p.partition_id, r'^[0-9]{6}$') THEN 'MONTH' 23 | WHEN REGEXP_CONTAINS(p.partition_id, r'^[0-9]{8}$') THEN 'DAY' 24 | WHEN REGEXP_CONTAINS(p.partition_id, r'^[0-9]{10}$') THEN 'HOUR' 25 | WHEN REGEXP_CONTAINS(p.partition_id, r'^\d+$') THEN 'INTEGER' 26 | END AS partition_type, 27 | e.partition_expiration_days, 28 | MIN(p.partition_id) AS earliest_partition_id, 29 | MAX(p.partition_id) AS latest_partition_id, 30 | COUNT(p.partition_id) AS partition_count, 31 | SUM(p.total_logical_bytes) AS sum_total_logical_bytes, 32 | MAX(p.last_modified_time) AS max_last_updated_time 33 | FROM {{ ref('information_schema_partitions') }} AS p 34 | LEFT JOIN partition_expirations AS e USING (table_catalog, table_schema, table_name) 35 | GROUP BY ALL 36 | HAVING partition_type IS NOT NULL 37 | -------------------------------------------------------------------------------- /models/information_schema/sessions/information_schema_sessions_by_user.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_sessions_by_user 4 | columns: 5 | - name: creation_time 6 | description: "(Partitioning column) Creation time of this session.\n Partitioning\ 7 | \ is based on the UTC time of this timestamp." 8 | data_type: TIMESTAMP 9 | - name: expiration_time 10 | description: "(Partitioning column) Expiration time of this session.\n \ 11 | \ Partitioning is based on the UTC time of this timestamp." 12 | data_type: TIMESTAMP 13 | - name: is_active 14 | description: "Is the session is still active? TRUE if yes, otherwise\n \ 15 | \ FALSE." 16 | data_type: BOOL 17 | - name: last_modified_time 18 | description: "(Partitioning column) Time when the session was last modified.\n\ 19 | \ Partitioning is based on the UTC time of this timestamp." 20 | data_type: TIMESTAMP 21 | - name: project_id 22 | description: (Clustering column) ID of the project. 23 | data_type: STRING 24 | - name: project_number 25 | description: Number of the project. 26 | data_type: INTEGER 27 | - name: session_id 28 | description: ID of the session. For example, bquxsession_1234. 29 | data_type: STRING 30 | - name: user_email 31 | description: "(Clustering column) Email address or service account of\n \ 32 | \ the user who ran the session." 33 | data_type: STRING 34 | -------------------------------------------------------------------------------- /models/information_schema/sessions/information_schema_sessions_by_project.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_sessions_by_project 4 | columns: 5 | - name: creation_time 6 | description: "(Partitioning column) Creation time of this session.\n Partitioning\ 7 | \ is based on the UTC time of this timestamp." 8 | data_type: TIMESTAMP 9 | - name: expiration_time 10 | description: "(Partitioning column) Expiration time of this session.\n \ 11 | \ Partitioning is based on the UTC time of this timestamp." 12 | data_type: TIMESTAMP 13 | - name: is_active 14 | description: "Is the session is still active? TRUE if yes, otherwise\n \ 15 | \ FALSE." 16 | data_type: BOOL 17 | - name: last_modified_time 18 | description: "(Partitioning column) Time when the session was last modified.\n\ 19 | \ Partitioning is based on the UTC time of this timestamp." 20 | data_type: TIMESTAMP 21 | - name: project_id 22 | description: (Clustering column) ID of the project. 23 | data_type: STRING 24 | - name: project_number 25 | description: Number of the project. 26 | data_type: INTEGER 27 | - name: session_id 28 | description: ID of the session. For example, bquxsession_1234. 29 | data_type: STRING 30 | - name: user_email 31 | description: "(Clustering column) Email address or service account of\n \ 32 | \ the user who ran the session." 33 | data_type: STRING 34 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_storage_usage_timeline_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-storage-usage-by-organization -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_STORAGE_USAGE_TIMELINE_BY_ORGANIZATION view, you 4 | need the following Identity and Access Management (IAM) permissions for your organization: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.dataViewer 10 | roles/bigquery.dataEditor 11 | roles/bigquery.metadataViewer 12 | roles/bigquery.admin 13 | This schema view is only available to users with defined Google Cloud 14 | organizations.For more information about BigQuery permissions, see 15 | Access control with IAM. -#} 16 | 17 | SELECT 18 | project_id, 19 | table_catalog, 20 | project_number, 21 | table_schema, 22 | table_name, 23 | billable_total_logical_usage, 24 | billable_active_logical_usage, 25 | billable_long_term_logical_usage, 26 | billable_total_physical_usage, 27 | billable_active_physical_usage, 28 | billable_long_term_physical_usage 29 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_USAGE_TIMELINE_BY_ORGANIZATION` 30 | -------------------------------------------------------------------------------- /models/information_schema/jobs_timeline/information_schema_jobs_timeline_by_folder.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-folder"], partition_by={'field': 'job_creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline-by-folder -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.JOBS_TIMELINE_BY_FOLDER view, you need 4 | the bigquery.jobs.listAll Identity and Access Management (IAM) permission for the parent 5 | folder. Each of the following predefined IAM roles includes the 6 | required permission: 7 | Folder Admin 8 | BigQuery Admin 9 | For more information about BigQuery permissions, see 10 | Access control with IAM. -#} 11 | 12 | SELECT 13 | period_start, 14 | period_slot_ms, 15 | project_id, 16 | project_number, 17 | folder_numbers, 18 | user_email, 19 | job_id, 20 | job_type, 21 | statement_type, 22 | priority, 23 | parent_job_id, 24 | job_creation_time, 25 | job_start_time, 26 | job_end_time, 27 | state, 28 | reservation_id, 29 | edition, 30 | total_bytes_billed, 31 | total_bytes_processed, 32 | error_result, 33 | cache_hit, 34 | period_shuffle_ram_usage_ratio, 35 | period_estimated_runnable_units, 36 | transaction_id 37 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_TIMELINE_BY_FOLDER` 38 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/time/compute_cost_per_minute_view.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: compute_cost_per_minute_view 5 | description: > 6 | An enriched view over the model that stores the compute cost per minute. 7 | config: 8 | meta: 9 | label: "Compute cost per minute" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: year 14 | description: The year of the compute cost. 15 | - name: month 16 | description: The month of the compute cost. 17 | - name: day 18 | description: The day of the compute cost. 19 | - name: hour 20 | description: The hour of the compute cost. 21 | - name: minute 22 | description: The minute of the compute cost. 23 | - name: project_id 24 | description: The project id of the job. 25 | - name: total_query_cost 26 | description: The total cost of all queries run during the minute. 27 | - name: failing_query_cost 28 | description: The total cost of all queries that failed during the minute. 29 | - name: total_slot_ms 30 | description: The total number of slot time milliseconds used by all queries during the minute. 31 | - name: total_slot_time 32 | description: The total number of slot time in human readable format used by all queries during the minute. 33 | - name: query_count 34 | description: The total number of queries run during the minute. 35 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/job_failure_analysis.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | 7 | WITH failed_jobs AS ( 8 | SELECT 9 | hour, 10 | query, 11 | j.*, 12 | j.error_result.reason AS error_reason, 13 | j.error_result.message AS error_message, 14 | j.error_result.location AS error_location 15 | FROM {{ ref('jobs_costs_incremental') }}, UNNEST(jobs) AS j 16 | WHERE j.error_result IS NOT NULL 17 | ), 18 | 19 | error_patterns AS ( 20 | SELECT 21 | error_reason, 22 | error_message, 23 | COUNT(*) AS error_count, 24 | APPROX_TOP_SUM(project_id, 1, 10) AS affected_projects, 25 | APPROX_TOP_SUM(user_email, 1, 10) AS affected_users, 26 | SUM(query_cost) AS total_failed_cost, 27 | AVG(total_slot_ms) AS avg_slot_ms, 28 | MIN(hour) AS first_occurrence, 29 | MAX(hour) AS last_occurrence 30 | FROM failed_jobs 31 | GROUP BY error_reason, error_message 32 | ) 33 | 34 | SELECT 35 | error_reason, 36 | error_message, 37 | error_count, 38 | {{ top_sum('affected_projects') }} AS top_affected_projects, 39 | {{ top_sum('affected_users') }} AS top_affected_users, 40 | ROUND(total_failed_cost, 2) AS total_failed_cost, 41 | ROUND(avg_slot_ms / 1000, 2) AS avg_slot_seconds, 42 | first_occurrence, 43 | last_occurrence, 44 | DATETIME_DIFF(last_occurrence, first_occurrence, HOUR) AS duration_hours 45 | FROM error_patterns 46 | ORDER BY error_count DESC 47 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 48 | -------------------------------------------------------------------------------- /models/information_schema/bi_engine/information_schema_bi_capacity_changes.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_bi_capacity_changes 4 | columns: 5 | - name: change_timestamp 6 | description: "Timestamp when the current update to BI Engine capacity\n \ 7 | \ was made." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: "The project ID of the project that contains BI Engine\n capacity." 11 | data_type: STRING 12 | - name: project_number 13 | description: "The project number of the project that contains\n BI Engine\ 14 | \ capacity." 15 | data_type: INTEGER 16 | - name: bi_capacity_name 17 | description: "The name of the object. Currently there can only be one capacity\ 18 | \ per\n project, hence the name is always default." 19 | data_type: STRING 20 | - name: size 21 | description: BI Engine RAM in bytes. 22 | data_type: INTEGER 23 | - name: user_email 24 | description: "Email address of the user or subject of the workforce identity\n\ 25 | \ federation that made the change. google for changes\n made by\ 26 | \ Google. NULL if the email address is unknown." 27 | data_type: STRING 28 | - name: preferred_tables 29 | description: "The set of preferred tables this BI Engine capacity must\n \ 30 | \ be used for. If set to null, BI Engine\n capacity is used\n \ 31 | \ for all queries in the current project." 32 | data_type: REPEATED STRING 33 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/job/job_failure_analysis.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: job_failure_analysis 5 | description: > 6 | Analyzes BigQuery job failures by error patterns, affected users, and cost impact. 7 | Identifies the most common error reasons and messages to help with debugging and prevention. 8 | config: 9 | meta: 10 | label: "Job Failure Analysis" 11 | order_fields_by: "label" 12 | group_label: "Compute reliability" 13 | columns: 14 | - name: error_reason 15 | description: The error reason code from BigQuery error_result 16 | - name: error_message 17 | description: The specific error message from BigQuery error_result 18 | - name: error_count 19 | description: Number of times this error pattern occurred 20 | - name: top_affected_projects 21 | description: Projects most affected by this error pattern 22 | - name: top_affected_users 23 | description: Users most affected by this error pattern 24 | - name: total_failed_cost 25 | description: Total cost of queries that failed with this error 26 | - name: avg_slot_seconds 27 | description: Average slot seconds used by failed jobs 28 | - name: first_occurrence 29 | description: When this error pattern was first seen 30 | - name: last_occurrence 31 | description: When this error pattern was last seen 32 | - name: duration_hours 33 | description: How long this error pattern has been occurring 34 | -------------------------------------------------------------------------------- /docs/configuration/audit-logs.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 5.2 3 | slug: /configuration/audit-logs 4 | --- 5 | 6 | # GCP BigQuery audit logs 7 | 8 | In this mode, the package will monitor all the jobs that written to a GCP BigQuery Audit logs table instead of using `INFORMATION_SCHEMA.JOBS` one. 9 | 10 | :::tip 11 | 12 | To get the best out of this mode, you should enable the `should_combine_audit_logs_and_information_schema` setting to combine both sources. 13 | More details on [the related page](/audit-logs-vs-information-schema). 14 | 15 | ::: 16 | 17 | At the time of writing, there are 2 versions of the audit logs table. More details in [the repository from Google](https://github.com/GoogleCloudPlatform/bigquery-utils/tree/master/views/audit). 18 | 19 | **dbt-bigquery-monitoring supports only the v2 version** 20 | It's likely that if you use v2, you will have a table named `cloudaudit_googleapis_com_data_access` in your audit dataset. 21 | 22 | To enable the "cloud audit logs mode", you'll need to define explicitly mandatory settings to set in the `dbt_project.yml` file: 23 | 24 | ```yml 25 | vars: 26 | enable_gcp_bigquery_audit_logs: true 27 | gcp_bigquery_audit_logs_storage_project: 'my-gcp-project' 28 | gcp_bigquery_audit_logs_dataset: 'my_dataset' 29 | gcp_bigquery_audit_logs_table: 'cloudaudit_googleapis_com_data_access' 30 | # should_combine_audit_logs_and_information_schema: true # Optional, default to false but you might want to combine both sources 31 | ``` 32 | 33 | [You might use environment variable as well](/configuration/package-settings). 34 | -------------------------------------------------------------------------------- /models/information_schema/recommendations_and_insights/information_schema_recommendations_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-recommendations-by-org -#} 3 | {# Required role/permissions: To view recommendations with the 4 | INFORMATION_SCHEMA.RECOMMENDATIONS_BY_ORGANIZATION view, you must have the 5 | required permissions for the corresponding recommender. The 6 | INFORMATION_SCHEMA.RECOMMENDATIONS_BY_ORGANIZATION view only returns 7 | recommendations that you have permission to view. When you have the required 8 | permissions on the organization, you can view recommendations for all projects 9 | within that organization, regardless of your permissions on the project itself.Ask your administrator to grant access to view the recommendations. To see the 10 | required permissions for each recommender, see the following: 11 | Partition & cluster recommender permissions 12 | Materialized view recommendations permissions 13 | Role recommendations for datasets permissions 14 | -#} 15 | 16 | SELECT 17 | recommendation_id, 18 | recommender, 19 | subtype, 20 | project_id, 21 | project_number, 22 | description, 23 | last_updated_time, 24 | target_resources, 25 | state, 26 | primary_impact, 27 | priority, 28 | associated_insight_ids, 29 | additional_details 30 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`RECOMMENDATIONS` 31 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_storage_by_folder.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-folder"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-folder -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_STORAGE_BY_FOLDER view, you need the 4 | following Identity and Access Management (IAM) permissions for the parent folder of the 5 | project: 6 | bigquery.tables.get 7 | bigquery.tables.list 8 | Each of the following predefined IAM roles includes the preceding 9 | permissions: 10 | roles/bigquery.admin 11 | roles/bigquery.dataViewer 12 | roles/bigquery.dataEditor 13 | roles/bigquery.metadataViewer 14 | For more information about BigQuery permissions, see 15 | BigQuery IAM roles and permissions. -#} 16 | 17 | SELECT 18 | folder_numbers, 19 | project_id, 20 | project_number, 21 | table_catalog, 22 | table_schema, 23 | table_name, 24 | creation_time, 25 | total_rows, 26 | total_partitions, 27 | total_logical_bytes, 28 | active_logical_bytes, 29 | long_term_logical_bytes, 30 | current_physical_bytes, 31 | total_physical_bytes, 32 | active_physical_bytes, 33 | long_term_physical_bytes, 34 | time_travel_physical_bytes, 35 | storage_last_modified_time, 36 | deleted, 37 | table_type, 38 | fail_safe_physical_bytes, 39 | last_metadata_index_refresh_time, 40 | table_deletion_reason, 41 | table_deletion_time 42 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_BY_FOLDER` 43 | -------------------------------------------------------------------------------- /docs/configuration/audit-logs-vs-information-schema.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 5.1 3 | slug: /audit-logs-vs-information-schema 4 | --- 5 | 6 | # Audit logs vs Information schema 7 | 8 | There are two ways to monitor BigQuery jobs: 9 | 10 | - Using the BigQuery audit logs 11 | - Using the `INFORMATION_SCHEMA.JOBS` table 12 | 13 | dbt-bigquery-monitoring supports both methods and goes further by providing a unified way to monitor both by offering a configuration that will combine the two sources. 14 | See `should_combine_audit_logs_and_information_schema` in the [configuration](/configuration) if you want to combine the two sources. 15 | 16 | ## What's in there? 17 | 18 | Each of the solution has its advantages and disadvantages. Here is a comparison table to help you choose the right one for your use case: 19 | 20 | | Feature | Audit logs | INFORMATION_SCHEMA | 21 | |---------|------------|--------------------| 22 | | Max retention | User defined | 6 months | 23 | | Detailed User information | ✅ | ❌ | 24 | | BI Engine | ❌ | ✅ | 25 | | Jobs insights | ❌ | ✅ | 26 | 27 | ## Audit logs 28 | 29 | [Audit logs were introduced in 2021](https://cloud.google.com/blog/products/data-analytics/bigquery-audit-logs-pipelines-analysis) as an alternative to the `INFORMATION_SCHEMA.JOBS` table. They provide more detailed information about the user who ran the query and can have more historical data. 30 | 31 | ## Information schema 32 | 33 | The `INFORMATION_SCHEMA.JOBS` table is a system table that contains information about the jobs that have been run in BigQuery. It provides a lot of information about the job such BI engine and insights. 34 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/dataset/dataset_with_potential_savings.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='view', 4 | ) 5 | }} 6 | WITH base AS ( 7 | SELECT 8 | project_id, 9 | dataset_id, 10 | storage_billing_model, 11 | optimal_storage_billing_model, 12 | SUM(total_logical_tb) AS total_logical_tb, 13 | SUM(total_physical_tb) AS total_physical_tb, 14 | SUM(logical_cost_monthly_forecast) AS logical_cost_monthly_forecast, 15 | SUM(physical_cost_monthly_forecast) AS physical_cost_monthly_forecast, 16 | SUM(storage_pricing_model_difference) AS storage_pricing_model_difference, 17 | SUM( 18 | IF(optimal_storage_billing_model = "LOGICAL", 19 | potential_savings, 20 | NULL) 21 | ) AS logical_part_potential_savings, 22 | SUM( 23 | IF(optimal_storage_billing_model = "PHYSICAL", 24 | potential_savings, 25 | NULL) 26 | ) AS physical_part_potential_savings, 27 | SUM(potential_savings) AS maximum_potential_savings 28 | FROM {{ ref('storage_with_cost') }} 29 | WHERE 30 | table_type = "BASE TABLE" 31 | AND potential_savings > 0 32 | GROUP BY ALL 33 | ), 34 | 35 | with_optimal AS ( 36 | SELECT 37 | * EXCEPT (optimal_storage_billing_model), 38 | IF(logical_cost_monthly_forecast > physical_cost_monthly_forecast, "PHYSICAL", "LOGICAL") AS optimal_storage_billing_model, 39 | FROM base 40 | ) 41 | 42 | SELECT 43 | *, 44 | IF(storage_billing_model != optimal_storage_billing_model, storage_pricing_model_difference, NULL) AS potential_savings 45 | FROM with_optimal 46 | ORDER BY potential_savings DESC 47 | LIMIT {{ dbt_bigquery_monitoring_variable_output_limit_size() }} 48 | -------------------------------------------------------------------------------- /models/information_schema/routines/information_schema_parameters.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_parameters 4 | columns: 5 | - name: SPECIFIC_CATALOG 6 | description: "The name of the project that contains the dataset in which the\n\ 7 | \ routine containing the parameter is defined" 8 | data_type: STRING 9 | - name: SPECIFIC_SCHEMA 10 | description: "The name of the dataset that contains the routine in which the\n\ 11 | \ parameter is defined" 12 | data_type: STRING 13 | - name: SPECIFIC_NAME 14 | description: The name of the routine in which the parameter is defined 15 | data_type: STRING 16 | - name: ORDINAL_POSITION 17 | description: The 1-based position of the parameter, or 0 for the return value 18 | data_type: STRING 19 | - name: PARAMETER_MODE 20 | description: "The mode of the parameter, either IN, OUT,\n INOUT, or NULL" 21 | data_type: STRING 22 | - name: IS_RESULT 23 | description: Whether the parameter is the result of the function, either YES or 24 | NO 25 | data_type: STRING 26 | - name: PARAMETER_NAME 27 | description: The name of the parameter 28 | data_type: STRING 29 | - name: DATA_TYPE 30 | description: "The type of the parameter, will be ANY TYPE if\n defined as\ 31 | \ an any type" 32 | data_type: STRING 33 | - name: PARAMETER_DEFAULT 34 | description: "The default value of the parameter as a SQL literal value,\n \ 35 | \ always NULL" 36 | data_type: STRING 37 | - name: IS_AGGREGATE 38 | description: Whether this is an aggregate parameter, always NULL 39 | data_type: STRING 40 | -------------------------------------------------------------------------------- /models/information_schema/write_api/information_schema_write_api_timeline.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_write_api_timeline 4 | columns: 5 | - name: start_timestamp 6 | description: "(Partitioning column) Start timestamp of the 1 minute interval\n\ 7 | \ for the aggregated statistics." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: (Clustering column) ID of the project. 11 | data_type: STRING 12 | - name: project_number 13 | description: Number of the project. 14 | data_type: INTEGER 15 | - name: dataset_id 16 | description: (Clustering column) ID of the dataset. 17 | data_type: STRING 18 | - name: table_id 19 | description: (Clustering column) ID of the table. 20 | data_type: STRING 21 | - name: stream_type 22 | description: "The stream type used \n for the data ingestion with BigQuery\ 23 | \ Storage Write API. It is supposed to be one of \"DEFAULT\", \"COMMITTED\"\ 24 | , \"BUFFERED\", or \"PENDING\"." 25 | data_type: STRING 26 | - name: error_code 27 | description: "Error code returned for the requests specified by this row. \"OK\"\ 28 | \ for\n successful requests." 29 | data_type: STRING 30 | - name: total_requests 31 | description: Total number of requests within the 1 minute interval. 32 | data_type: INTEGER 33 | - name: total_rows 34 | description: Total number of rows from all requests within the 1 minute interval. 35 | data_type: INTEGER 36 | - name: total_input_bytes 37 | description: Total number of bytes from all rows within the 1 minute interval. 38 | data_type: INTEGER 39 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/table/table_with_potential_savings.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: table_with_potential_savings 5 | description: > 6 | A model that identifies tables with a better pricing model under logical billing, 7 | compared to physical billing, by evaluating the difference in storage costs. 8 | config: 9 | meta: 10 | label: "Better Pricing Model (Logical)" 11 | order_fields_by: "label" 12 | group_label: "Storage cost optimization" 13 | columns: 14 | - name: project_id 15 | description: The ID of the BigQuery project. 16 | - name: dataset_id 17 | description: The ID of the dataset containing the table. 18 | - name: table_id 19 | description: The ID of the table being analyzed. 20 | - name: total_logical_tb 21 | description: The total size of the table in terabytes, as calculated by logical bytes. 22 | - name: total_physical_tb 23 | description: The total size of the table in terabytes, as calculated by physical bytes. 24 | - name: logical_cost_monthly_forecast 25 | description: The forecasted monthly storage cost for the table under the logical billing model. 26 | - name: physical_cost_monthly_forecast 27 | description: The forecasted monthly storage cost for the table under the physical billing model. 28 | - name: optimal_storage_billing_model 29 | description: The optimal storage billing model for this table (LOGICAL or PHYSICAL) to minimize the costs 30 | - name: potential_savings 31 | description: Potential savings leveraging the right storage billing model 32 | -------------------------------------------------------------------------------- /models/information_schema/jobs_timeline/information_schema_jobs_timeline_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"], partition_by={'field': 'job_creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline-by-organization -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION view, you need 4 | the bigquery.jobs.listAll Identity and Access Management (IAM) permission for the organization. 5 | Each of the following predefined IAM roles includes the required 6 | permission: 7 | BigQuery Resource Admin at the organization level 8 | Organization Owner 9 | Organization Admin 10 | The JOBS_BY_ORGANIZATION schema table is only available to users with defined 11 | Google Cloud organizations.For more information about BigQuery permissions, see 12 | Access control with IAM. -#} 13 | 14 | SELECT 15 | period_start, 16 | period_slot_ms, 17 | project_id, 18 | project_number, 19 | folder_numbers, 20 | user_email, 21 | job_id, 22 | job_type, 23 | statement_type, 24 | priority, 25 | parent_job_id, 26 | job_creation_time, 27 | job_start_time, 28 | job_end_time, 29 | state, 30 | reservation_id, 31 | edition, 32 | total_bytes_billed, 33 | total_bytes_processed, 34 | error_result, 35 | cache_hit, 36 | period_shuffle_ram_usage_ratio, 37 | period_estimated_runnable_units 38 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_TIMELINE_BY_ORGANIZATION` 39 | -------------------------------------------------------------------------------- /models/information_schema/views/information_schema_materialized_views.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-materialized-views -#} 3 | {# Required role/permissions: 4 | 5 | To get the permissions that 6 | you need to query the INFORMATION_SCHEMA.MATERIALIZED_VIEWS view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery Metadata Viewer (roles/bigquery.metadataViewer) 14 | IAM role on your project or dataset. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | 24 | 25 | 26 | This predefined role contains 27 | 28 | the permissions required to query the INFORMATION_SCHEMA.MATERIALIZED_VIEWS view. To see the exact permissions that are 29 | required, expand the Required permissions section: 30 | 31 | 32 | 33 | Required permissions 34 | The following permissions are required to query the INFORMATION_SCHEMA.MATERIALIZED_VIEWS view: 35 | 36 | 37 | bigquery.tables.get 38 | 39 | 40 | bigquery.tables.list 41 | 42 | 43 | 44 | You might also be able to get 45 | these permissions 46 | with custom roles or 47 | other predefined roles. 48 | Access control with IAM -#} 49 | 50 | SELECT 51 | table_catalog, 52 | table_schema, 53 | table_name, 54 | last_refresh_time, 55 | refresh_watermark, 56 | last_refresh_status 57 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`MATERIALIZED_VIEWS` 58 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_constraint_column_usage.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization()) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-constraint-column-usage -#} 3 | 4 | 5 | {% set preflight_sql -%} 6 | SELECT 7 | CONCAT('`', CATALOG_NAME, '`.`', SCHEMA_NAME, '`') AS SCHEMA_NAME 8 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`SCHEMATA` 9 | {%- endset %} 10 | {% set results = run_query(preflight_sql) %} 11 | {% set dataset_list = results | map(attribute='SCHEMA_NAME') | list %} 12 | {%- if dataset_list | length == 0 -%} 13 | {{ log("No datasets found in the project list", info=False) }} 14 | {%- endif -%} 15 | 16 | WITH base AS ( 17 | {%- if dataset_list | length == 0 -%} 18 | SELECT CAST(NULL AS STRING) AS table_catalog, CAST(NULL AS STRING) AS table_schema, CAST(NULL AS STRING) AS table_name, CAST(NULL AS STRING) AS column_name, CAST(NULL AS STRING) AS constraint_catalog, CAST(NULL AS STRING) AS constraint_schema, CAST(NULL AS STRING) AS constraint_name 19 | LIMIT 0 20 | {%- else %} 21 | {% for dataset in dataset_list -%} 22 | SELECT 23 | table_catalog, 24 | table_schema, 25 | table_name, 26 | column_name, 27 | constraint_catalog, 28 | constraint_schema, 29 | constraint_name 30 | FROM {{ dataset | trim }}.`INFORMATION_SCHEMA`.`CONSTRAINT_COLUMN_USAGE` 31 | {% if not loop.last %}UNION ALL{% endif %} 32 | {% endfor %} 33 | {%- endif -%} 34 | ) 35 | 36 | SELECT 37 | table_catalog, 38 | table_schema, 39 | table_name, 40 | column_name, 41 | constraint_catalog, 42 | constraint_schema, 43 | constraint_name 44 | FROM 45 | base 46 | -------------------------------------------------------------------------------- /models/information_schema/tables/information_schema_table_storage_by_organization.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-organization"]) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization -#} 3 | {# Required role/permissions: To query the INFORMATION_SCHEMA.TABLE_STORAGE_BY_ORGANIZATION view, you need the following 4 | Identity and Access Management (IAM) permissions for your organization: 5 | bigquery.tables.get 6 | bigquery.tables.list 7 | Each of the following predefined IAM roles includes the preceding 8 | permissions: 9 | roles/bigquery.admin 10 | roles/bigquery.dataViewer 11 | roles/bigquery.dataEditor 12 | roles/bigquery.metadataViewer 13 | This schema view is only available to users with defined Google Cloud 14 | organizations.For more information about BigQuery permissions, see 15 | Access control with IAM. -#} 16 | 17 | SELECT 18 | project_id, 19 | project_number, 20 | table_catalog, 21 | table_schema, 22 | table_name, 23 | creation_time, 24 | total_rows, 25 | total_partitions, 26 | total_logical_bytes, 27 | active_logical_bytes, 28 | long_term_logical_bytes, 29 | current_physical_bytes, 30 | total_physical_bytes, 31 | active_physical_bytes, 32 | long_term_physical_bytes, 33 | time_travel_physical_bytes, 34 | storage_last_modified_time, 35 | deleted, 36 | table_type, 37 | fail_safe_physical_bytes, 38 | last_metadata_index_refresh_time, 39 | table_deletion_reason, 40 | table_deletion_time 41 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_BY_ORGANIZATION` 42 | -------------------------------------------------------------------------------- /models/information_schema/write_api/information_schema_write_api_timeline_by_folder.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_write_api_timeline_by_folder 4 | columns: 5 | - name: start_timestamp 6 | description: "(Partitioning column) Start timestamp of the 1 minute interval\n\ 7 | \ for the aggregated statistics." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: (Clustering column) ID of the project. 11 | data_type: STRING 12 | - name: project_number 13 | description: Number of the project. 14 | data_type: INTEGER 15 | - name: dataset_id 16 | description: (Clustering column) ID of the dataset. 17 | data_type: STRING 18 | - name: table_id 19 | description: (Clustering column) ID of the table. 20 | data_type: STRING 21 | - name: stream_type 22 | description: "The stream type used \n for the data ingestion with BigQuery\ 23 | \ Storage Write API. It is supposed to be one of \"DEFAULT\", \"COMMITTED\"\ 24 | , \"BUFFERED\", or \"PENDING\"." 25 | data_type: STRING 26 | - name: error_code 27 | description: "Error code returned for the requests specified by this row. \"OK\"\ 28 | \ for\n successful requests." 29 | data_type: STRING 30 | - name: total_requests 31 | description: Total number of requests within the 1 minute interval. 32 | data_type: INTEGER 33 | - name: total_rows 34 | description: Total number of rows from all requests within the 1 minute interval. 35 | data_type: INTEGER 36 | - name: total_input_bytes 37 | description: Total number of bytes from all rows within the 1 minute interval. 38 | data_type: INTEGER 39 | -------------------------------------------------------------------------------- /models/information_schema/write_api/information_schema_write_api_timeline_by_organization.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_write_api_timeline_by_organization 4 | columns: 5 | - name: start_timestamp 6 | description: "(Partitioning column) Start timestamp of the 1 minute interval\n\ 7 | \ for the aggregated statistics." 8 | data_type: TIMESTAMP 9 | - name: project_id 10 | description: (Clustering column) ID of the project. 11 | data_type: STRING 12 | - name: project_number 13 | description: Number of the project. 14 | data_type: INTEGER 15 | - name: dataset_id 16 | description: (Clustering column) ID of the dataset. 17 | data_type: STRING 18 | - name: table_id 19 | description: (Clustering column) ID of the table. 20 | data_type: STRING 21 | - name: stream_type 22 | description: "The stream type used \n for the data ingestion with BigQuery\ 23 | \ Storage Write API. It is supposed to be one of \"DEFAULT\", \"COMMITTED\"\ 24 | , \"BUFFERED\", or \"PENDING\"." 25 | data_type: STRING 26 | - name: error_code 27 | description: "Error code returned for the requests specified by this row. \"OK\"\ 28 | \ for\n successful requests." 29 | data_type: STRING 30 | - name: total_requests 31 | description: Total number of requests within the 1 minute interval. 32 | data_type: INTEGER 33 | - name: total_rows 34 | description: Total number of rows from all requests within the 1 minute interval. 35 | data_type: INTEGER 36 | - name: total_input_bytes 37 | description: Total number of bytes from all rows within the 1 minute interval. 38 | data_type: INTEGER 39 | -------------------------------------------------------------------------------- /models/information_schema/jobs/information_schema_jobs.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs -#} 3 | {# Required role/permissions: 4 | 5 | To get the permission that 6 | you need to query the INFORMATION_SCHEMA.JOBS view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery Resource Viewer (roles/bigquery.resourceViewer) 14 | IAM role on your project. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | -#} 24 | 25 | SELECT 26 | bi_engine_statistics, 27 | cache_hit, 28 | creation_time, 29 | destination_table, 30 | end_time, 31 | error_result, 32 | job_id, 33 | job_stages, 34 | job_type, 35 | labels, 36 | parent_job_id, 37 | priority, 38 | project_id, 39 | project_number, 40 | query, 41 | referenced_tables, 42 | reservation_id, 43 | edition, 44 | session_info, 45 | start_time, 46 | state, 47 | statement_type, 48 | timeline, 49 | total_bytes_billed, 50 | total_bytes_processed, 51 | total_modified_partitions, 52 | total_slot_ms, 53 | transaction_id, 54 | user_email, 55 | transferred_bytes, 56 | materialized_view_statistics, 57 | metadata_cache_statistics, 58 | search_statistics, 59 | query_dialect, 60 | continuous, 61 | vector_search_statistics, 62 | continuous_query_info, 63 | job_creation_reason, 64 | query_info 65 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS` 66 | -------------------------------------------------------------------------------- /models/monitoring/storage/datamart/table/partitions_monitoring.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: partitions_monitoring 5 | description: > 6 | A model that summarizes partition information for tables across projects and datasets. 7 | config: 8 | materialized: table 9 | columns: 10 | - name: project_id 11 | description: The project ID (TABLE_CATALOG) where the table resides. 12 | - name: dataset_id 13 | description: The dataset ID (TABLE_SCHEMA) where the table resides. 14 | - name: table_id 15 | description: The name of the table. 16 | - name: partition_type 17 | description: > 18 | The type of partitioning used for the table. Can be YEAR, MONTH, DAY, or HOUR, 19 | determined by the format of the partition_id. 20 | - name: earliest_partition_id 21 | description: The ID of the earliest partition for the table. 22 | - name: latest_partition_id 23 | description: The ID of the latest partition for the table. 24 | - name: partition_count 25 | description: The total number of partitions for the table. 26 | - name: sum_total_logical_bytes 27 | description: The sum of total_logical_bytes across all partitions of the table. 28 | - name: max_last_updated_time 29 | description: The most recent last_modified_time across all partitions of the table. 30 | - name: earliest_partition_time 31 | description: The time of the earliest partition for the table else NULL if the partition is not time based. 32 | - name: latest_partition_time 33 | description: The time of the latest partition for the table else NULL if the partition is not time based. 34 | -------------------------------------------------------------------------------- /models/monitoring/compute/datamart/bi_engine/bi_engine_materialized_view_analysis.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: bi_engine_materialized_view_analysis 5 | description: > 6 | Analyzes BI Engine usage and materialized view effectiveness across projects. 7 | Identifies performance improvements from BigQuery's acceleration features. 8 | config: 9 | meta: 10 | label: "BI Engine & Materialized View Analysis" 11 | order_fields_by: "label" 12 | group_label: "Advanced optimization" 13 | columns: 14 | - name: project_id 15 | description: BigQuery project ID 16 | - name: bi_engine_mode 17 | description: BI Engine acceleration mode used 18 | - name: bi_engine_queries 19 | description: Number of queries using BI Engine 20 | - name: bi_engine_avg_slot_seconds 21 | description: Average slot seconds for BI Engine queries 22 | - name: bi_engine_cache_hit_percentage 23 | description: Cache hit percentage for BI Engine queries 24 | - name: bi_engine_performance_tier 25 | description: Performance classification for BI Engine usage 26 | - name: bi_engine_reasons 27 | description: Reasons for BI Engine acceleration decisions 28 | - name: queries_using_materialized_views 29 | description: Number of queries leveraging materialized views 30 | - name: avg_slot_seconds_with_mv 31 | description: Average slot seconds for queries using materialized views 32 | - name: mv_tables_used 33 | description: List of materialized view tables being used 34 | - name: optimization_recommendation 35 | description: Specific recommendation for BI Engine and MV optimization 36 | -------------------------------------------------------------------------------- /models/information_schema/jobs/information_schema_jobs_by_project.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs -#} 3 | {# Required role/permissions: 4 | 5 | To get the permission that 6 | you need to query the INFORMATION_SCHEMA.JOBS view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery Resource Viewer (roles/bigquery.resourceViewer) 14 | IAM role on your project. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | -#} 24 | 25 | SELECT 26 | bi_engine_statistics, 27 | cache_hit, 28 | creation_time, 29 | destination_table, 30 | end_time, 31 | error_result, 32 | job_id, 33 | job_stages, 34 | job_type, 35 | labels, 36 | parent_job_id, 37 | priority, 38 | project_id, 39 | project_number, 40 | query, 41 | referenced_tables, 42 | reservation_id, 43 | edition, 44 | session_info, 45 | start_time, 46 | state, 47 | statement_type, 48 | timeline, 49 | total_bytes_billed, 50 | total_bytes_processed, 51 | total_modified_partitions, 52 | total_slot_ms, 53 | transaction_id, 54 | user_email, 55 | transferred_bytes, 56 | materialized_view_statistics, 57 | metadata_cache_statistics, 58 | search_statistics, 59 | query_dialect, 60 | continuous, 61 | vector_search_statistics, 62 | continuous_query_info, 63 | job_creation_reason, 64 | query_info 65 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_BY_PROJECT` 66 | -------------------------------------------------------------------------------- /models/information_schema/jobs/information_schema_jobs_by_user.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), partition_by={'field': 'creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs-by-user -#} 3 | {# Required role/permissions: 4 | 5 | To get the permission that 6 | you need to query the INFORMATION_SCHEMA.JOBS_BY_USER view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery User (roles/bigquery.user) 14 | IAM role on your project. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | -#} 24 | 25 | SELECT 26 | bi_engine_statistics, 27 | cache_hit, 28 | creation_time, 29 | destination_table, 30 | dml_statistics, 31 | end_time, 32 | error_result, 33 | job_id, 34 | job_stages, 35 | job_type, 36 | labels, 37 | parent_job_id, 38 | priority, 39 | project_id, 40 | project_number, 41 | query, 42 | referenced_tables, 43 | reservation_id, 44 | edition, 45 | session_info, 46 | start_time, 47 | state, 48 | statement_type, 49 | timeline, 50 | total_bytes_billed, 51 | total_bytes_processed, 52 | total_modified_partitions, 53 | total_slot_ms, 54 | transaction_id, 55 | user_email, 56 | transferred_bytes, 57 | materialized_view_statistics, 58 | metadata_cache_statistics, 59 | search_statistics, 60 | query_dialect, 61 | continuous, 62 | vector_search_statistics, 63 | continuous_query_info, 64 | job_creation_reason, 65 | query_info 66 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_BY_USER` 67 | -------------------------------------------------------------------------------- /models/monitoring/global/datamart/dbt_bigquery_monitoring_options.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table', 4 | ) 5 | }} 6 | 7 | {%- set option_macros = { 8 | 'bq_region': dbt_bigquery_monitoring_variable_bq_region(), 9 | 'input_gcp_projects': dbt_bigquery_monitoring_variable_input_gcp_projects(), 10 | 'use_flat_pricing': dbt_bigquery_monitoring_variable_use_flat_pricing(), 11 | 'per_billed_tb_price': dbt_bigquery_monitoring_variable_per_billed_tb_price(), 12 | 'free_tb_per_month': dbt_bigquery_monitoring_variable_free_tb_per_month(), 13 | 'hourly_slot_price': dbt_bigquery_monitoring_variable_hourly_slot_price(), 14 | 'active_logical_storage_gb_price': dbt_bigquery_monitoring_variable_active_logical_storage_gb_price(), 15 | 'long_term_logical_storage_gb_price': dbt_bigquery_monitoring_variable_long_term_logical_storage_gb_price(), 16 | 'active_physical_storage_gb_price': dbt_bigquery_monitoring_variable_active_physical_storage_gb_price(), 17 | 'long_term_physical_storage_gb_price': dbt_bigquery_monitoring_variable_long_term_physical_storage_gb_price(), 18 | 'free_storage_gb_per_month': dbt_bigquery_monitoring_variable_free_storage_gb_per_month(), 19 | 'bi_engine_gb_hourly_price': dbt_bigquery_monitoring_variable_bi_engine_gb_hourly_price(), 20 | 'lookback_window_days': dbt_bigquery_monitoring_variable_lookback_window_days(), 21 | 'output_materialization': dbt_bigquery_monitoring_variable_output_materialization(), 22 | 'output_limit_size': dbt_bigquery_monitoring_variable_output_limit_size(), 23 | } %} 24 | 25 | {% for option, macro in option_macros.items() %} 26 | SELECT 27 | "{{ option }}" AS option_label, 28 | "{{ macro }}" AS option_value 29 | {% if not loop.last %} 30 | UNION ALL 31 | {% endif %} 32 | {% endfor %} 33 | -------------------------------------------------------------------------------- /models/information_schema/reservations/information_schema_capacity_commitments.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | models: 3 | - name: information_schema_capacity_commitments 4 | columns: 5 | - name: ddl 6 | description: The DDL statement used to create this capacity commitment. 7 | data_type: STRING 8 | - name: project_id 9 | description: ID of the administration project. 10 | data_type: STRING 11 | - name: project_number 12 | description: Number of the administration project. 13 | data_type: INTEGER 14 | - name: capacity_commitment_id 15 | description: ID that uniquely identifies the capacity commitment. 16 | data_type: STRING 17 | - name: commitment_plan 18 | description: Commitment plan of the capacity commitment. 19 | data_type: STRING 20 | - name: state 21 | description: "State the capacity commitment is in. Can be PENDING or\n \ 22 | \ ACTIVE." 23 | data_type: STRING 24 | - name: slot_count 25 | description: Slot count associated with the capacity commitment. 26 | data_type: INTEGER 27 | - name: edition 28 | description: The edition associated with this reservation. For more information 29 | about editions, see Introduction to BigQuery editions. 30 | data_type: STRING 31 | - name: is_flat_rate 32 | description: Whether the commitment is associated with the legacy flat-rate capacity 33 | model or an edition. If FALSE, the current commitment is associated with an 34 | edition. If TRUE, the commitment is the legacy flat-rate capacity model. 35 | data_type: BOOL 36 | - name: renewal_plan 37 | description: New commitment plan after the end of current commitment plan. You 38 | can change the renewal plan for a commitment at any time until it expires. 39 | data_type: STRING 40 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/cost/compute_cost_per_hour.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: compute_cost_per_hour 5 | description: > 6 | A model that stores the compute cost per hour. 7 | config: 8 | meta: 9 | label: "Compute cost per hour" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: hour 14 | description: The hour of the compute cost. 15 | - name: project_id 16 | description: The project id of the job. 17 | - name: reservation_id 18 | description: The reservation id of the job. 19 | - name: total_query_cost 20 | description: The total cost of all queries run during the hour. 21 | - name: failing_query_cost 22 | description: The total cost of all queries that failed during the hour. 23 | - name: total_bytes_processed 24 | description: The total bytes processed during the hour. 25 | - name: total_slot_ms 26 | description: The total number of slot time milliseconds used by all queries during the hour. 27 | - name: total_slot_time 28 | description: The total number of slot time in human readable format used by all queries during the hour. 29 | - name: query_count 30 | description: The total number of queries run during the hour. 31 | - name: job_state 32 | description: A struct containing the state of the job. 33 | config: 34 | meta: 35 | fields: 36 | - name: done 37 | description: Indicates if the job is done. 38 | - name: pending 39 | description: Indicates if the job is pending. 40 | - name: running 41 | description: Indicates if the job is running. -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/dbt/models_costs_incremental.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy = 'insert_overwrite', 5 | partition_by = { 6 | "field": "hour", 7 | "data_type": "timestamp", 8 | "granularity": "hour", 9 | "copy_partitions": dbt_bigquery_monitoring_variable_use_copy_partitions() 10 | }, 11 | cluster_by = ["dbt_model_name"], 12 | partition_expiration_days = dbt_bigquery_monitoring_variable_lookback_window_days() 13 | ) 14 | }} 15 | SELECT 16 | hour, 17 | dbt_model_name, 18 | APPROX_TOP_COUNT(project_id, 100) AS project_ids, 19 | APPROX_TOP_COUNT(reservation_id, 100) AS reservation_ids, 20 | APPROX_TOP_COUNT(user_email, 100) AS user_emails, 21 | COUNTIF(cache_hit) AS cache_hit, 22 | SUM(query_cost) AS total_query_cost, 23 | SUM(total_slot_ms) AS total_slot_ms, 24 | COUNT(*) AS query_count, 25 | -- Enhanced dbt model tracking 26 | COUNTIF(error_result IS NOT NULL) AS failed_runs, 27 | SUM(total_bytes_processed) AS total_bytes_processed, 28 | SUM(total_bytes_billed) AS total_bytes_billed, 29 | AVG(total_time_seconds) AS avg_duration_seconds, 30 | MAX(total_time_seconds) AS max_duration_seconds, 31 | MIN(total_time_seconds) AS min_duration_seconds, 32 | -- Model complexity indicators 33 | AVG(total_slot_ms) AS avg_slot_ms, 34 | -- Performance percentiles 35 | APPROX_QUANTILES(total_time_seconds, 100)[OFFSET(50)] AS median_duration_seconds, 36 | APPROX_QUANTILES(total_time_seconds, 100)[OFFSET(90)] AS p90_duration_seconds, 37 | -- Resource efficiency 38 | SUM(total_bytes_processed) / NULLIF(SUM(total_slot_ms), 0) AS bytes_per_slot_ms 39 | FROM 40 | {{ jobs_done_incremental_hourly() }} 41 | WHERE dbt_model_name IS NOT NULL 42 | GROUP BY hour, dbt_model_name 43 | -------------------------------------------------------------------------------- /models/monitoring/compute/intermediate/cost/compute_cost_per_minute.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: compute_cost_per_minute 5 | description: > 6 | A model that stores the compute cost per minute. 7 | config: 8 | meta: 9 | label: "Compute cost per minute" 10 | order_fields_by: "label" 11 | group_label: "Compute cost" 12 | columns: 13 | - name: minute 14 | description: The minute of the compute cost. 15 | - name: project_id 16 | description: The project id of the job. 17 | - name: reservation_id 18 | description: The reservation id of the job. 19 | - name: total_query_cost 20 | description: The total cost of all queries run during the minute. 21 | - name: failing_query_cost 22 | description: The total cost of all queries that failed during the minute. 23 | - name: total_bytes_processed 24 | description: The total bytes processed during the minute. 25 | - name: total_slot_ms 26 | description: The total number of slot time milliseconds used by all queries during the minute. 27 | - name: total_slot_time 28 | description: The total number of slot time in human readable format used by all queries during the hour. 29 | - name: query_count 30 | description: The total number of queries run during the minute. 31 | - name: job_state 32 | description: A struct containing the statistics per state. 33 | config: 34 | meta: 35 | fields: 36 | - name: done 37 | description: Indicates if the job is done. 38 | - name: pending 39 | description: Indicates if the job is pending. 40 | - name: running 41 | description: Indicates if the job is running. -------------------------------------------------------------------------------- /models/information_schema/jobs/information_schema_jobs_by_folder.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized=dbt_bigquery_monitoring_materialization(), enabled=false, tags=["dbt-bigquery-monitoring-information-schema-by-folder"], partition_by={'field': 'creation_time', 'data_type': 'timestamp', 'granularity': 'hour'}, partition_expiration_days=180) }} 2 | {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs-by-folder -#} 3 | {# Required role/permissions: 4 | 5 | To get the permission that 6 | you need to query the INFORMATION_SCHEMA.JOBS_BY_FOLDER view, 7 | 8 | ask your administrator to grant you the 9 | 10 | 11 | 12 | 13 | BigQuery Resource Viewer (roles/bigquery.resourceViewer) 14 | IAM role on your parent folder. 15 | 16 | 17 | 18 | 19 | 20 | 21 | For more information about granting roles, see Manage access to projects, folders, and organizations. 22 | 23 | -#} 24 | 25 | SELECT 26 | bi_engine_statistics, 27 | cache_hit, 28 | creation_time, 29 | destination_table, 30 | end_time, 31 | error_result, 32 | job_id, 33 | job_stages, 34 | job_type, 35 | labels, 36 | parent_job_id, 37 | priority, 38 | project_id, 39 | project_number, 40 | query, 41 | referenced_tables, 42 | reservation_id, 43 | edition, 44 | session_info, 45 | start_time, 46 | state, 47 | statement_type, 48 | timeline, 49 | total_bytes_billed, 50 | total_bytes_processed, 51 | total_modified_partitions, 52 | total_slot_ms, 53 | transaction_id, 54 | user_email, 55 | transferred_bytes, 56 | materialized_view_statistics, 57 | metadata_cache_statistics, 58 | search_statistics, 59 | query_dialect, 60 | continuous, 61 | vector_search_statistics, 62 | continuous_query_info, 63 | job_creation_reason, 64 | query_info 65 | FROM `region-{{ dbt_bigquery_monitoring_variable_bq_region() }}`.`INFORMATION_SCHEMA`.`JOBS_BY_FOLDER` 66 | --------------------------------------------------------------------------------