├── .gitignore ├── .sqlfluff ├── LICENSE ├── README.md ├── dbt_project.yml ├── grafana_dashboard_template.json ├── macros ├── build_client_type.sql ├── build_egress_emails.sql ├── calc_bq_cost.sql ├── determine_data_layer.sql ├── generate_surrogate_key.sql └── test_unique_combination_of_columns.sql ├── models ├── intermediate │ ├── int_bigquery_audit_log.yml │ ├── int_importance_calculations.sql │ ├── int_table_active_users.sql │ └── int_threat_calculation.sql ├── marts │ ├── bigquery_audit_log.yml │ ├── dim_bigquery_users.sql │ ├── dim_error_messages.sql │ ├── dim_job.sql │ ├── dim_job_labels.sql │ ├── dim_job_table_view_references.sql │ ├── dim_query_statements.sql │ ├── dim_user_agents.sql │ └── fct_executed_statements.sql ├── reports │ ├── reports.yml │ ├── rpt_bigquery_dbt_metrics_daily.sql │ ├── rpt_bigquery_query_alerting.sql │ ├── rpt_bigquery_table_usage_daily.sql │ ├── rpt_bigquery_usage_cost_daily.sql │ └── rpt_bigquery_user_metrics_daily.sql ├── staging │ └── bigquery_audit_log │ │ ├── stg_bigquery_audit_log.yml │ │ └── stg_bigquery_audit_log__data_access.sql └── utils │ ├── dim_leaner_query_date.sql │ └── utils.yml └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # dbt folders 2 | target/ 3 | dbt_modules/ 4 | dbt_packages/ 5 | logs/ 6 | 7 | # vscode folders 8 | .vscode/ 9 | 10 | #virtual env 11 | .venv/ 12 | 13 | #ds_Store files 14 | .DS_Store 15 | 16 | .user.yml 17 | -------------------------------------------------------------------------------- /.sqlfluff: -------------------------------------------------------------------------------- 1 | [sqlfluff] 2 | templater = dbt 3 | dialect = bigquery 4 | large_file_skip_byte_limit = 0 5 | runaway_limit = 20 6 | rules = L001, L002, L003, L004, L006, L009, L010, L011, L017, L018, L019, L022, L030, L032, L036, L039, L040, L041, L042, L046, L048, L049, L051, L060, L061, L065 7 | 8 | [sqlfluff:rules] 9 | tab_space_size = 4 10 | indent_unit = space 11 | capitalisation_policy = lower 12 | 13 | [sqlfluff:indentation] 14 | template_blocks_indent = false 15 | 16 | [sqlfluff:rules:L011] 17 | # aliasing 18 | aliasing = explicit 19 | 20 | [sqlfluff:rules:L036] 21 | wildcard_policy = single 22 | 23 | [sqlfluff:rules:L040] 24 | # Null & Boolean Literals 25 | capitalisation_policy = consistent 26 | # Comma separated list of words to ignore for this rule 27 | ignore_words = None 28 | ignore_words_regex = None 29 | 30 | [sqlfluff:rules:L042] 31 | forbid_subquery_in = both 32 | 33 | [sqlfluff:rules:L051] 34 | fully_qualify_join_types = both 35 | 36 | [tool.sqlfluff.templater.jinja] 37 | apply_dbt_builtins = true 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Grafana Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LeanerQuery dbt Package 2 | 3 | This is a dbt package built to help teams that use BigQuery understand their costs associated with dbt and general use. 4 | 5 | This package uses [BigQuery Audit](https://cloud.google.com/logging/docs/reference/audit/bigquery/rest/Shared.Types/AuditData) Log data and assumes that a log sink is set up to export the logs into tables in a BigQuery dataset. If you are unfamiliar with how to accomplish this, visit this Google cloud [resource](https://cloud.google.com/blog/products/data-analytics/bigquery-audit-logs-pipelines-analysis). 6 | 7 | This package assumes that the user(s) executing the processes have read access to the BigQuery log dataset referenced above and write access to the dataset where LeanerQuery is creating/updating objects. 8 | 9 | The package contains a lot of variable values to determine costs, aggregation, and scoring. You will want to override/specify some of these values in your `dbt_project.yml` file as your details and use cases are undoubtedly different than ours. More details in the [variables](#variables) section. 10 | 11 | This dbt package aims to provide the following details for data teams who are using BigQuery: 12 | - costs associated with queries and dbt builds 13 | - errors, with codes and messages that your users are getting from BigQuery 14 | - easy categorization and classification of your dbt models through a scoring system that assigns an **importance**, **threat**, and **overall priority** score per model. 15 | 16 | ## Quick Links 17 | - **[Getting Started](#getting-started)** 18 | - **[Models](#models)** 19 | - **[Reports](#reports)** 20 | - **[Scoring logic](#scoring-logic)** 21 | - **[Variables](#variables)** 22 | - **[Visualization](#visualization)** 23 | 24 | ## Getting Started 25 | - New for 0.2.0 and a required variable: add BQ audit log source variable to your `dbt_project.yml` file. Format of this variable is a list of fully-qualified table names, including project, dataset, and table. By default, Google/BigQuery places audit logs in your project's `bigquery_audit_logs` dataset and `cloudaudit_googleapis_com_data_access` table. You may still use only one project - please maintain the same format as below, please, but with one entry. Additional examples in the [variables](#variables) section. 26 | ```YML 27 | leaner_query_sources: ["`project_id.dataset_id.table_id`", "`project_id_2.dataset_id_2.table_id_2`"] 28 | ``` 29 | - Add any/all variable overrides to your `dbt_project.yml` file: 30 | ```YML 31 | leaner_query_importance_query_score_3: ['My BI Tool'] 32 | leaner_query_importance_query_score_4: ['My reverse ETL tool'] 33 | 34 | leaner_query_prod_dataset_names: ['marts','reports'] 35 | leaner_query_stage_dataset_names: ['staging_models'] 36 | 37 | leaner_query_custom_clients: [ 38 | {'user_agent': 'agent_string', 'principal_email':'username', 'client_name':'Custom Client 1'}, 39 | {'user_agent': 'agent_string', 'principal_email':'different_username', 'client_name':'Custom Client 2'}, 40 | ] 41 | 42 | leaner_query_custom_egress_emails: [ 43 | 'egress_sa@your-project.iam.gserviceaccount.com', 44 | 'another_sa@your-project.iam.gserviceaccount.com', 45 | ] 46 | ``` 47 | 48 | 49 | - Optional: update your dbt_project.yml file to override the dataset where the leaner_query models will be built (defaults to `leaner_query`): 50 | ```YML 51 | leaner_query: 52 | +schema: leaner_query_output 53 | ``` 54 | - Run via tag: 55 | ``` 56 | dbt run -s tag:leaner_query 57 | ``` 58 | 59 | ### Models 60 | The package outputs a dimensional model that allows users to build upon for custom analysis and reporting. This dimensional model contains: 61 | - **dim_bigquery_users**: all users who have executed a statement against BQ. Uses the `principal_email` attribute to classify users as a **user** or a **service account**. 62 | - **dim_error_messages**: all distinct error messages that have been issued by BQ. 63 | - **dim_job_labels**: bridge-type table that contains label keys and values for jobs 64 | - **dim_job_table_view_references**: combination of all tables and views ever referenced in the BQ audit logs along with which layer they are a part of. The layer classification uses the values set in the `leaner_query_prod_dataset_names` and `leaner_query_stage_dataset_names` variables. 65 | - **dim_jobs**: details about every BQ job executed, including parsed and normalized dbt metadata that is sent to BQ. 66 | - **dim_query_statements**: essentially just `query_statements` and `dbt_info` for each given BQ job. This was originally all part of `dim_jobs`, but was broken out as it's not used often and accounsted for a majority of the size of the table. 67 | - **dim_user_agents**: parsed and classified caller_supplied_user_agent details, defined as a `client_type`. The classification logic can be seen below and is augmented by values added to the `leaner_query_custom_clients` variable. 68 | - **fct_executed_statements**: BQ job execution event data including output rows, slot_ms, processed_bytes, billed_bytes, etc. 69 | #### Client type classification logic: 70 | - **Connected Sheet - User Initiated**: the job contained a label of 'sheets_trigger' with a value of 'user' 71 | - **Connected Sheet - Scheduled**: the job contained a label of 'sheets_trigger' with a value of 'schedule' 72 | - **Scheduled Query**: the job contained a label of 'data_source_id' with a value of 'scheduled_query' 73 | - **dbt run**: the job contained a label of 'dbt_invocation_id' with any value or the user_agent contains 'dbt' 74 | - **Web console**: the job's user_agent contains 'Mozilla' 75 | - **Python Client**: the job's user_agent contains 'gl-python' 76 | - **Fivetran**: the job's user_agent contains 'Fivetran' 77 | - **Hightouch**: the job's user_agent contains 'Hightouch' 78 | - **Rudderstack**: the job's user_agent contains 'gcloud-golang-bigquery' and the principal_email contains 'rudderstack' 79 | - **Golang Client**: the job's user_agent contains 'gcloud-golang' or the user_agent contains 'google-api-go' 80 | - **Node Client**: the job's user_agent contains 'gcloud-node' 81 | - **Java Client**: the job's user_agent contains 'SimbaJDBCDriver' 82 | - **Stemma Crawler**: the job's user_agent contains '(gzip),gzip(gfe)' 83 | 84 | ### Reports 85 | **Note**: Reports can be disabled by changing the `leaner_query_enable_reports` variable value to false. 86 | - **rpt_bigquery_dbt_metrics_daily**: produces aggregates, on a daily and dbt model grain, including: 87 | - total_dbt_builds 88 | - total_dbt_tests 89 | - total_dbt_snapshots 90 | - total_estimated_dbt_run_cost_usd 91 | - total_estimated_dbt_test_cost_usd 92 | - total_estimated_snapshot_cost_usd 93 | - total_estimated_dbt_build_time_ms 94 | - total_estimated_dbt_test_time_ms 95 | - total_estimated_dbt_snapshot_time_ms 96 | - average_build_cost 97 | - average_test_cost 98 | - average_snapshot_cost 99 | - average_build_time_ms 100 | - average_test_time_ms 101 | - average_snapshot_time_ms 102 | - **rpt_bigquery_table_usage_daily**: produces aggregates, on a daily, table/view, layer (raw, stage, prod), and client type grain: 103 | - total_queries_run 104 | - dbt_models_run 105 | - dbt_tests_run 106 | - total_human_users 107 | - total_service_accounts 108 | - total_errors 109 | - threat_score 110 | - importance_score 111 | - priority_score 112 | - **rpt_bigquery_usage_cost_daily**: produces cost specific aggregates, on a daily and client type grain: 113 | - total_queries_run 114 | - total_estimated_cost_usd (*includes queries and other statement types*) 115 | - total_estimated_dbt_run_build_cost_usd 116 | - total_estimated_dbt_run_test_cost_usd 117 | - total_time_ms (*includes queries and other statement types*) 118 | - total_estimated_query_cost_usd (*only includes queries*) 119 | - total_query_time_ms (*only includes queries*) 120 | - **rpt_bigquery_user_metrics_daily**: produces aggregates, on a daily, user, and user_type (service-account vs user) grain: 121 | - total_queries_run 122 | - total_errors 123 | - total_prod_tables_used 124 | - total_stage_tables_used 125 | - total_raw_tables_used 126 | - total_tables_used 127 | - total_estimated_cost_usd 128 | - total_time_ms 129 | - error_rate (total_errors/total_queries) 130 | - prod_table_use_rate (total_prod_tables_used/total_tables_used) 131 | - stage_table_use_rate (total_stage_tables_used/total_tables_used) 132 | - raw_table_use_rate (total_raw_tables_used/total_tables_used) 133 | ### Scoring logic 134 | #### Importance scoring 135 | We sought to evaluate how important each BQ object was by looking at them from four separate dimensions, which combine to total possible score of 100: 136 | - **Service account usage** - if tables are being accessed (in query statements) by service accounts, the object is important enough to be part of an automated process. We determine a 7-day total query count percentile rank (per table) and use the weight from the `leaner_query_weight_importance__service_account_queries` to calculate a service account usage component. 137 | - **dbt usage** - if tables are being queried in our dbt build process, the object holds some level of importance because it is an obvious dependency for other models/tables. We determine a 7-day toal query count percentile rank (per table) and use the weight from the `leaner_query_weight_importance__dbt_queries` to calculate a dbt usage component. 138 | - **Egress usage** - tables that are used by egress processes are obviously important because we are likely impacting other systems within our organization. We further classify egress in this score on a scale of 1-4 (least to most important) with the use of the values in the `leaner_query_importance_query_score_1...4` variables. We then determine a 7-day query count percentile rank (per table) and use the weight from the `leaner_query_weight_importance__egress_use` to calculate an egress usage component. 139 | - **User breadth** - if tables are being accessed (in query statements) by a wide array of users (non-service account users), the object is important in ad hoc query and discovery processes. We use a trailing 30 day active user metric (by table) and determine a 7-day query count percentile rank (by table) and use the weight from the `leaner_query_weight_importance__user_breadth` to calculate a user breadth component. 140 | #### Threat scoring 141 | We sought to evaluate how much of a threat or risk each BQ object was by looking at them from four separate dimensions, which combine to total possible score of 100: 142 | - **Egress usage** - if tables are being used (by service accounts) to send data to other systems, it represents a possible threat or risk to our business. In addition, we assert that egress use of tables that are in a non production data layer (determined by the `leaner_query_prod_dataset_names` variable) is very risky and is assessed a multiplier value (5 * the 7 day total query count). We determine a 7-day total query count percentile rank (with any multiplier applied, per table) and use the weight from the `leaner_query_weight_threat__service_account_egress` to calculate an egress component. 143 | - **Cost to query** - tables that are expensive to query are a threat or a risk to the business from a cost perspective and should be modeled and/or tuned to be more efficient. We determine a 7-day total query cost percentile rank (per table) and use the weight from the `leaner_query_weight_threat__cost_to_query` to calculate a cost to query component. 144 | - **Cost to build** - tables that are expensive to build (by dbt processes), again, are a threat or a risk because they could be unnecessarily costing our business money. We determine a 7-day total cost to build percentile rank (per table) and use the weight from the `leaner_query_weight_threat__cost_to_build` to calculate a cost to build component. 145 | - **Daily errors** - if tables are being used incorrectly and/or are often the source of errors, they represent a risk to our reputation and overall usefulness to the organization. We determine a 7-day total error percentile rank (per table) and use the weight from the `leaner_query_weight_threat__daily_errors` to calculate a daily error component. 146 | #### Priority Score 147 | Priorizing where to spend precious refactoring and refinement time is difficult and rarely data-backed. To help prioritize these efforts, we combine the table importance score and the threat level score to develop an overall priority score. The calculation is as follows: 148 | ``` 149 | (importance score * importance_level_weight) + (threat score * threat_level_weight) 150 | ``` 151 | ## Variables 152 | ### General purpose 153 | - **leaner_query_sources** 154 | - **Description**: fully-scoped table name of where in BigQuery the audit logs you wish to query are stored. This can either be a single project or multiple, depending on your needs. Format must be a list, with fully-qualified tables quoted and back-ticked. 155 | - **Default**: none: you must enter a value for LeanerQuery to function. 156 | - **Example**: 157 | - Single project: ["\`example-corp-bq.bigquery_audit_logs.cloudaudit_googleapis_com_data_access\`"] 158 | - Multiple projects: ["\`example-corp-bq.bigquery_audit_logs.cloudaudit_googleapis_com_data_access\`", "\`example-corp-bq-two.bigquery_audit_logs.cloudaudit_googleapis_com_data_access\`", "\`example-corp-bq-three.bigquery_audit_logs.cloudaudit_googleapis_com_data_access\`"] 159 | 160 | - **leaner_query_enable_reports** 161 | - **Description**: enable report models listed above. 162 | - **Default**: true 163 | - **leaner_query_require_partition_by_reports** 164 | - **Description**: enable requiring the use of partitions when querying report tables. 165 | - **Default**: true 166 | - **leaner_query_prod_dataset_names** 167 | - **Description**: a list of dataset names that are considered production (e.g. marts and reporting tables), meant for consumption by users and other systems. 168 | - **Default**: [] (None) 169 | - **leaner_query_stage_dataset_names** 170 | - **Description**: a list of dataset names that are considered staging and are not meant for consumption by users and other systems; used by dbt to build production models. 171 | - **Default**: [] (None) 172 | - **leaner_query_bq_on_demand_pricing** 173 | - **Description**: list price for BQ on-demand pricing per TB bytes billed. You can adjust this if your contract has a differnt rate than the list price. 174 | - **Default**: 6.225 175 | - **leaner_query_bq_slot_pricing** 176 | - **Description**: list price for BQ slot based pricing per ms, rounded up to the minute. You can adjust this if your contract has a differnt rate than the list price. 177 | - **Default**: 178 | ```json 179 | {"standard": 0.04, "enterprise": 0.06, "enterprise_plus": 0.10} 180 | ``` 181 | - **leaner_query_bq_pricing_schedule** 182 | - **Description**: your current pricing schedule. Should be one of ('on_demand', 'standard', 'enterprise', 'enterprise_plus'). 183 | - **Default**: on_demand 184 | - **leaner_query_custom_clients** 185 | - **Description**: a list of custom clients that extends the standard client list above. 186 | - **Default**:[] (None) 187 | ```json 188 | { 189 | "user_agent": "sample_user_agent_value", 190 | "principal_email": "optional_address", 191 | "client_name": "sample_custom_client_name" 192 | } 193 | ``` 194 | - **leaner_query_enable_dev_limits** 195 | - **Description**: This is used to limit the incremental builds in a dev environment so you aren't doing a full refresh during development and CI. 196 | - **Default**: true 197 | - **leaner_query_dev_limit_days** 198 | - **Description**: Used in conjunction with `leaner_query_enable_dev_limits`, this determines how many days back you want to have your incremental models build in dev. 199 | - **Default**: 30 200 | - **leaner_query_dev_target_name** 201 | - **Description**: The name of your target dev environment. 202 | - **Default**: "dev" 203 | ### Reporting 204 | - **leaner_query_priority_threat_level_weight** 205 | - **Description**: weight given to the threat report score 206 | - **Default**: 0.65 207 | - **leaner_query_priority_importance_level_weight** 208 | - **Description**: weight given to the threat report score 209 | - **Default**: 0.35 210 | - **Threat report**: 211 | - **leaner_query_weight_threat__service_account_egress** 212 | - **Description**: weight given to service account egress activity for an object 213 | - **Default**: 0.35 214 | - **leaner_query_weight_threat__cost_to_query** 215 | - **Description**: weight given to the cost to query an object 216 | - **Default**: 0.30 217 | - **leaner_query_weight_threat__cost_to_build** 218 | - **Description**: weight given to the cost to build an object (with dbt) 219 | - **Default**: 0.25 220 | - **leaner_query_weight_threat__daily_errors** 221 | - **Description**: weight given to the volume of daily errors associated with an object 222 | - **Default**: 0.10 223 | - **Importance report**: 224 | - - **leaner_query_importance_query_score_1** 225 | - **Description**: list of client names to use to score the lowest importance queries 226 | - **Default**: ["Web console"] 227 | - - **leaner_query_importance_query_score_2** 228 | - **Description**: list of client names to use to score the second lowest importance queries 229 | - **Default**: ["Connected Sheet - User Initiated", "Connected Sheet - Scheduled", "Scheduled Query"] 230 | - - **leaner_query_importance_query_score_3** 231 | - **Description**: list of client names to use to score the second highest importance queries 232 | - **Default**: [] (None) 233 | - - **leaner_query_importance_query_score_4** 234 | - **Description**: list of client names to use to score the highest importance queries 235 | - **Default**: [] (None) 236 | - **leaner_query_weight_importance__service_account_queries** 237 | - **Description**: weight given to the volume of queries by service accounts for an object 238 | - **Default**: 0.20 239 | - **leaner_query_weight_importance__dbt_queries** 240 | - **Description**: weight given to the cost to build an object with dbt 241 | - **Default**: 0.20 242 | - **leaner_query_weight_importance__egress_use** 243 | - **Description**: weight given to volume of queries used by service accounts that perform egress for an object 244 | - **Default**: 0.35 245 | - **leaner_query_weight_importance__user_breadth** 246 | - **Description**: weight given to the breadth of users querying an object 247 | - **Default**: 0.25 248 | 249 | ### Visualization 250 | #### Grafana Dashboarding Template 251 | We have included a template for our Grafana Dashboard that help to track our BQ and dbt costs. You can find the raw JSON in `grafana_dashboard_template.json`. You can easily import this dashboard into any Grafana instance and immediately start visualizing your data - assuming you have the BigQuery Datasource configured. -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'leaner_query' 2 | version: '1.0.0' 3 | config-version: 2 4 | 5 | profile: 'leaner_query' 6 | 7 | require-dbt-version: [">=1.3.0", "<2.0.0"] 8 | 9 | target-path: "target" 10 | clean-targets: 11 | - "target" 12 | - "dbt_packages" 13 | 14 | models: 15 | leaner_query: 16 | +schema: leaner_query 17 | +tags: 18 | - leaner_query 19 | 20 | marts: 21 | materialized: table 22 | 23 | reports: 24 | materialized: table 25 | 26 | staging: 27 | materialized: view 28 | 29 | intermediate: 30 | +materialized: table 31 | 32 | utils: 33 | materialized: ephemeral 34 | 35 | 36 | vars: 37 | leaner_query_priority_threat_level_weight: .65 38 | leaner_query_priority_importance_level_weight: .35 39 | leaner_query_weight_threat__service_account_egress: 0.35 40 | leaner_query_weight_threat__cost_to_query: 0.30 41 | leaner_query_weight_threat__cost_to_build: 0.25 42 | leaner_query_weight_threat__daily_errors: 0.10 43 | leaner_query_weight_importance__service_account_queries: 0.2 44 | leaner_query_weight_importance__dbt_queries: 0.2 45 | leaner_query_weight_importance__egress_use: 0.35 46 | leaner_query_weight_importance__user_breadth: 0.25 47 | 48 | leaner_query_require_partition_by_reports: true 49 | leaner_query_enable_reports: true 50 | leaner_query_enable_dev_limits: true 51 | 52 | leaner_query_dev_limit_days: 30 53 | leaner_query_dev_target_name: ['dev'] 54 | 55 | #Client types 56 | leaner_query_importance_query_score_1: ['Web console'] 57 | leaner_query_importance_query_score_2: ['Connected Sheet - User Initiated', 'Connected Sheet - Scheduled', 'Scheduled Query'] 58 | leaner_query_importance_query_score_3: [] 59 | leaner_query_importance_query_score_4: [] 60 | 61 | leaner_query_prod_dataset_names: [] 62 | leaner_query_stage_dataset_names: [] 63 | 64 | leaner_query_custom_clients: [] 65 | 66 | # principal emails used for egress 67 | leaner_query_custom_egress_emails: [] 68 | 69 | leaner_query_bq_on_demand_pricing: 6.225 70 | leaner_query_bq_slot_pricing: {'standard': 0.04, 'enterprise': 0.06, 'enterprise_plus' : 0.10} 71 | leaner_query_bq_pricing_schedule: on_demand 72 | -------------------------------------------------------------------------------- /grafana_dashboard_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_BIGQUERY", 5 | "label": "biguery", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "grafana-bigquery-datasource", 9 | "pluginName": "Google BigQuery" 10 | } 11 | ], 12 | "__elements": {}, 13 | "__requires": [ 14 | { 15 | "type": "panel", 16 | "id": "barchart", 17 | "name": "Bar chart", 18 | "version": "" 19 | }, 20 | { 21 | "type": "grafana", 22 | "id": "grafana", 23 | "name": "Grafana", 24 | "version": "10.4.0-65610" 25 | }, 26 | { 27 | "type": "datasource", 28 | "id": "grafana-bigquery-datasource", 29 | "name": "Google BigQuery", 30 | "version": "1.2.6" 31 | }, 32 | { 33 | "type": "panel", 34 | "id": "stat", 35 | "name": "Stat", 36 | "version": "" 37 | }, 38 | { 39 | "type": "panel", 40 | "id": "table", 41 | "name": "Table", 42 | "version": "" 43 | }, 44 | { 45 | "type": "panel", 46 | "id": "timeseries", 47 | "name": "Time series", 48 | "version": "" 49 | } 50 | ], 51 | "annotations": { 52 | "list": [ 53 | { 54 | "builtIn": 1, 55 | "datasource": { 56 | "type": "grafana", 57 | "uid": "-- Grafana --" 58 | }, 59 | "enable": true, 60 | "hide": true, 61 | "iconColor": "rgba(0, 211, 255, 1)", 62 | "name": "Annotations & Alerts", 63 | "target": { 64 | "limit": 100, 65 | "matchAny": false, 66 | "tags": [], 67 | "type": "dashboard" 68 | }, 69 | "type": "dashboard" 70 | } 71 | ] 72 | }, 73 | "editable": true, 74 | "fiscalYearStartMonth": 0, 75 | "graphTooltip": 0, 76 | "id": null, 77 | "links": [], 78 | "liveNow": false, 79 | "panels": [ 80 | { 81 | "collapsed": false, 82 | "gridPos": { 83 | "h": 1, 84 | "w": 24, 85 | "x": 0, 86 | "y": 0 87 | }, 88 | "id": 23, 89 | "panels": [], 90 | "title": "Costs", 91 | "type": "row" 92 | }, 93 | { 94 | "datasource": { 95 | "type": "grafana-bigquery-datasource", 96 | "uid": "${DS_BIGQUERY}" 97 | }, 98 | "description": "", 99 | "fieldConfig": { 100 | "defaults": { 101 | "color": { 102 | "mode": "thresholds" 103 | }, 104 | "mappings": [], 105 | "thresholds": { 106 | "mode": "absolute", 107 | "steps": [ 108 | { 109 | "color": "green", 110 | "value": null 111 | } 112 | ] 113 | }, 114 | "unit": "currencyUSD", 115 | "unitScale": true 116 | }, 117 | "overrides": [] 118 | }, 119 | "gridPos": { 120 | "h": 7, 121 | "w": 4, 122 | "x": 0, 123 | "y": 1 124 | }, 125 | "id": 15, 126 | "options": { 127 | "colorMode": "value", 128 | "graphMode": "area", 129 | "justifyMode": "auto", 130 | "orientation": "auto", 131 | "reduceOptions": { 132 | "calcs": [ 133 | "lastNotNull" 134 | ], 135 | "fields": "", 136 | "values": false 137 | }, 138 | "showPercentChange": false, 139 | "textMode": "auto", 140 | "wideLayout": true 141 | }, 142 | "pluginVersion": "10.4.0-65610", 143 | "targets": [ 144 | { 145 | "datasource": { 146 | "type": "grafana-bigquery-datasource", 147 | "uid": "${DS_BIGQUERY}" 148 | }, 149 | "editorMode": "code", 150 | "format": 1, 151 | "location": "US", 152 | "project": "", 153 | "rawQuery": true, 154 | "rawSql": "Select sum(total_estimated_cost_usd) as total_cost\nfrom `leaner_query.rpt_bigquery_usage_cost_daily`\nwhere report_date = current_date() - 1", 155 | "refId": "A", 156 | "sql": { 157 | "columns": [ 158 | { 159 | "parameters": [], 160 | "type": "function" 161 | } 162 | ], 163 | "groupBy": [ 164 | { 165 | "property": { 166 | "type": "string" 167 | }, 168 | "type": "groupBy" 169 | } 170 | ], 171 | "limit": 50 172 | } 173 | } 174 | ], 175 | "title": "Estimated Total Cost (Yesterday)", 176 | "type": "stat" 177 | }, 178 | { 179 | "datasource": { 180 | "type": "grafana-bigquery-datasource", 181 | "uid": "${DS_BIGQUERY}" 182 | }, 183 | "description": "", 184 | "fieldConfig": { 185 | "defaults": { 186 | "color": { 187 | "mode": "thresholds" 188 | }, 189 | "decimals": 0, 190 | "mappings": [], 191 | "thresholds": { 192 | "mode": "absolute", 193 | "steps": [ 194 | { 195 | "color": "green", 196 | "value": null 197 | } 198 | ] 199 | }, 200 | "unit": "none", 201 | "unitScale": true 202 | }, 203 | "overrides": [] 204 | }, 205 | "gridPos": { 206 | "h": 7, 207 | "w": 4, 208 | "x": 4, 209 | "y": 1 210 | }, 211 | "id": 16, 212 | "options": { 213 | "colorMode": "value", 214 | "graphMode": "area", 215 | "justifyMode": "auto", 216 | "orientation": "auto", 217 | "reduceOptions": { 218 | "calcs": [ 219 | "lastNotNull" 220 | ], 221 | "fields": "", 222 | "values": false 223 | }, 224 | "showPercentChange": false, 225 | "textMode": "auto", 226 | "wideLayout": true 227 | }, 228 | "pluginVersion": "10.4.0-65610", 229 | "targets": [ 230 | { 231 | "datasource": { 232 | "type": "grafana-bigquery-datasource", 233 | "uid": "${DS_BIGQUERY}" 234 | }, 235 | "editorMode": "code", 236 | "format": 1, 237 | "location": "US", 238 | "project": "", 239 | "rawQuery": true, 240 | "rawSql": "Select sum(total_queries_run) as total_query_count\nfrom `leaner_query.rpt_bigquery_usage_cost_daily`\nwhere report_date = current_date() - 1", 241 | "refId": "A", 242 | "sql": { 243 | "columns": [ 244 | { 245 | "parameters": [], 246 | "type": "function" 247 | } 248 | ], 249 | "groupBy": [ 250 | { 251 | "property": { 252 | "type": "string" 253 | }, 254 | "type": "groupBy" 255 | } 256 | ], 257 | "limit": 50 258 | } 259 | } 260 | ], 261 | "title": "Total Queries Run Yesterday", 262 | "type": "stat" 263 | }, 264 | { 265 | "datasource": { 266 | "type": "grafana-bigquery-datasource", 267 | "uid": "${DS_BIGQUERY}" 268 | }, 269 | "description": "Cost changes (7 day and 30 day) for clients that have accumulated at least $1 in query costs over the past day.", 270 | "fieldConfig": { 271 | "defaults": { 272 | "color": { 273 | "mode": "thresholds" 274 | }, 275 | "custom": { 276 | "align": "auto", 277 | "cellOptions": { 278 | "type": "auto" 279 | }, 280 | "inspect": false 281 | }, 282 | "mappings": [], 283 | "thresholds": { 284 | "mode": "absolute", 285 | "steps": [ 286 | { 287 | "color": "green", 288 | "value": null 289 | } 290 | ] 291 | }, 292 | "unit": "none", 293 | "unitScale": true 294 | }, 295 | "overrides": [ 296 | { 297 | "matcher": { 298 | "id": "byName", 299 | "options": "total_estimated_cost_usd" 300 | }, 301 | "properties": [ 302 | { 303 | "id": "unit", 304 | "value": "currencyUSD" 305 | } 306 | ] 307 | }, 308 | { 309 | "matcher": { 310 | "id": "byName", 311 | "options": "thirty_day_avg" 312 | }, 313 | "properties": [ 314 | { 315 | "id": "unit", 316 | "value": "currencyUSD" 317 | } 318 | ] 319 | }, 320 | { 321 | "matcher": { 322 | "id": "byName", 323 | "options": "seven_day_avg" 324 | }, 325 | "properties": [ 326 | { 327 | "id": "unit", 328 | "value": "currencyUSD" 329 | } 330 | ] 331 | }, 332 | { 333 | "matcher": { 334 | "id": "byName", 335 | "options": "seven_day_change" 336 | }, 337 | "properties": [ 338 | { 339 | "id": "unit", 340 | "value": "percentunit" 341 | } 342 | ] 343 | }, 344 | { 345 | "matcher": { 346 | "id": "byName", 347 | "options": "thirty_day_change" 348 | }, 349 | "properties": [ 350 | { 351 | "id": "unit", 352 | "value": "percentunit" 353 | }, 354 | { 355 | "id": "custom.cellOptions", 356 | "value": { 357 | "mode": "lcd", 358 | "type": "gauge", 359 | "valueDisplayMode": "color" 360 | } 361 | }, 362 | { 363 | "id": "thresholds", 364 | "value": { 365 | "mode": "absolute", 366 | "steps": [ 367 | { 368 | "color": "green", 369 | "value": null 370 | }, 371 | { 372 | "color": "red", 373 | "value": 1e-9 374 | } 375 | ] 376 | } 377 | } 378 | ] 379 | }, 380 | { 381 | "matcher": { 382 | "id": "byName", 383 | "options": "seven_day_change" 384 | }, 385 | "properties": [ 386 | { 387 | "id": "custom.cellOptions", 388 | "value": { 389 | "mode": "lcd", 390 | "type": "gauge", 391 | "valueDisplayMode": "color" 392 | } 393 | }, 394 | { 395 | "id": "thresholds", 396 | "value": { 397 | "mode": "absolute", 398 | "steps": [ 399 | { 400 | "color": "green", 401 | "value": null 402 | }, 403 | { 404 | "color": "red", 405 | "value": 1e-8 406 | } 407 | ] 408 | } 409 | } 410 | ] 411 | } 412 | ] 413 | }, 414 | "gridPos": { 415 | "h": 7, 416 | "w": 16, 417 | "x": 8, 418 | "y": 1 419 | }, 420 | "id": 17, 421 | "options": { 422 | "cellHeight": "sm", 423 | "footer": { 424 | "countRows": false, 425 | "fields": "", 426 | "reducer": [ 427 | "sum" 428 | ], 429 | "show": false 430 | }, 431 | "showHeader": true, 432 | "sortBy": [ 433 | { 434 | "desc": true, 435 | "displayName": "Total Est Cost" 436 | } 437 | ] 438 | }, 439 | "pluginVersion": "10.4.0-65610", 440 | "targets": [ 441 | { 442 | "datasource": { 443 | "type": "grafana-bigquery-datasource", 444 | "uid": "${DS_BIGQUERY}" 445 | }, 446 | "editorMode": "code", 447 | "format": 1, 448 | "location": "US", 449 | "project": "", 450 | "rawQuery": true, 451 | "rawSql": "with aggs as(\n select\n report_date,\n client_type,\n total_estimated_cost_usd,\n avg(total_estimated_cost_usd) over (partition by client_type order by report_date rows between 6 preceding and current row) as seven_day_avg,\n avg(total_estimated_cost_usd) over (partition by client_type order by report_date rows between 29 preceding and current row) as thirty_day_avg\n from `leaner_query.rpt_bigquery_usage_cost_daily`\n where report_date >= current_date - 31\n)\nselect\n client_type,\n aggs.total_estimated_cost_usd,\n thirty_day_avg,\n seven_day_avg,\n safe_divide((total_estimated_cost_usd - seven_day_avg), seven_day_avg) as seven_day_change,\n safe_divide((total_estimated_cost_usd - thirty_day_avg), thirty_day_avg) as thirty_day_change\nfrom aggs\nwhere report_date = current_date - 1\nand aggs.total_estimated_cost_usd > 1.0\norder by 2 desc", 452 | "refId": "A", 453 | "sql": { 454 | "columns": [ 455 | { 456 | "parameters": [], 457 | "type": "function" 458 | } 459 | ], 460 | "groupBy": [ 461 | { 462 | "property": { 463 | "type": "string" 464 | }, 465 | "type": "groupBy" 466 | } 467 | ], 468 | "limit": 50 469 | } 470 | } 471 | ], 472 | "title": "Cost Trends by Client Type", 473 | "transformations": [ 474 | { 475 | "id": "organize", 476 | "options": { 477 | "excludeByName": {}, 478 | "indexByName": { 479 | "client_type": 0, 480 | "seven_day_avg": 2, 481 | "seven_day_change": 4, 482 | "thirty_day_avg": 3, 483 | "thirty_day_change": 5, 484 | "total_estimated_cost_usd": 1 485 | }, 486 | "renameByName": { 487 | "client_type": "Client Type", 488 | "seven_day_avg": "Seven Day Avg Cost", 489 | "seven_day_change": "Seven Day Cost Change", 490 | "thirty_day_avg": "Thirty Day Avg Cost", 491 | "thirty_day_change": "Thirty Day Cost Change", 492 | "total_estimated_cost_usd": "Total Est Cost" 493 | } 494 | } 495 | } 496 | ], 497 | "type": "table" 498 | }, 499 | { 500 | "datasource": { 501 | "type": "grafana-bigquery-datasource", 502 | "uid": "${DS_BIGQUERY}" 503 | }, 504 | "fieldConfig": { 505 | "defaults": { 506 | "color": { 507 | "mode": "palette-classic" 508 | }, 509 | "custom": { 510 | "axisBorderShow": false, 511 | "axisCenteredZero": false, 512 | "axisColorMode": "text", 513 | "axisLabel": "", 514 | "axisPlacement": "auto", 515 | "barAlignment": 0, 516 | "drawStyle": "line", 517 | "fillOpacity": 0, 518 | "gradientMode": "none", 519 | "hideFrom": { 520 | "legend": false, 521 | "tooltip": false, 522 | "viz": false 523 | }, 524 | "insertNulls": false, 525 | "lineInterpolation": "linear", 526 | "lineStyle": { 527 | "fill": "solid" 528 | }, 529 | "lineWidth": 1, 530 | "pointSize": 5, 531 | "scaleDistribution": { 532 | "type": "linear" 533 | }, 534 | "showPoints": "never", 535 | "spanNulls": true, 536 | "stacking": { 537 | "group": "A", 538 | "mode": "none" 539 | }, 540 | "thresholdsStyle": { 541 | "mode": "off" 542 | } 543 | }, 544 | "decimals": 2, 545 | "mappings": [], 546 | "thresholds": { 547 | "mode": "absolute", 548 | "steps": [ 549 | { 550 | "color": "green", 551 | "value": null 552 | }, 553 | { 554 | "color": "red", 555 | "value": 80 556 | } 557 | ] 558 | }, 559 | "unit": "currencyUSD", 560 | "unitScale": true 561 | }, 562 | "overrides": [] 563 | }, 564 | "gridPos": { 565 | "h": 12, 566 | "w": 24, 567 | "x": 0, 568 | "y": 8 569 | }, 570 | "id": 3, 571 | "options": { 572 | "legend": { 573 | "calcs": [], 574 | "displayMode": "list", 575 | "placement": "bottom", 576 | "showLegend": true 577 | }, 578 | "tooltip": { 579 | "mode": "single", 580 | "sort": "none" 581 | } 582 | }, 583 | "pluginVersion": "9.4.4-2911eec", 584 | "targets": [ 585 | { 586 | "datasource": { 587 | "type": "grafana-bigquery-datasource", 588 | "uid": "${DS_BIGQUERY}" 589 | }, 590 | "editorMode": "code", 591 | "format": 1, 592 | "location": "US", 593 | "project": "", 594 | "rawQuery": true, 595 | "rawSql": "with total_costs as(\n select\n timestamp(report_date) as report_date\n , sum(total_estimated_cost_usd) as sum_cost\nfrom `leaner_query.rpt_bigquery_user_metrics_daily`\nwhere username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n -- and report_date != current_date\ngroup by 1\n)\nSelect *,\nCase \n when Lag(report_date, 7) over(order by report_date) is null then null\n else avg(sum_cost) over(order by report_date rows between 6 preceding and current row)\nEnd as rolling_avg \nfrom total_costs\nORDER BY 2\n", 596 | "refId": "A", 597 | "sql": { 598 | "columns": [ 599 | { 600 | "parameters": [], 601 | "type": "function" 602 | } 603 | ], 604 | "groupBy": [ 605 | { 606 | "property": { 607 | "type": "string" 608 | }, 609 | "type": "groupBy" 610 | } 611 | ], 612 | "limit": 50 613 | } 614 | } 615 | ], 616 | "title": "Aggregated Costs", 617 | "transformations": [ 618 | { 619 | "id": "convertFieldType", 620 | "options": { 621 | "conversions": [ 622 | { 623 | "destinationType": "time", 624 | "targetField": "report_date\\username" 625 | } 626 | ], 627 | "fields": {} 628 | } 629 | }, 630 | { 631 | "id": "organize", 632 | "options": { 633 | "excludeByName": {}, 634 | "includeByName": {}, 635 | "indexByName": {}, 636 | "renameByName": { 637 | "report_date": "Report Date", 638 | "rolling_avg": "Rolling Avg", 639 | "sum_cost": "Cost" 640 | } 641 | } 642 | } 643 | ], 644 | "type": "timeseries" 645 | }, 646 | { 647 | "datasource": { 648 | "type": "grafana-bigquery-datasource", 649 | "uid": "${DS_BIGQUERY}" 650 | }, 651 | "fieldConfig": { 652 | "defaults": { 653 | "color": { 654 | "mode": "palette-classic" 655 | }, 656 | "custom": { 657 | "axisBorderShow": false, 658 | "axisCenteredZero": false, 659 | "axisColorMode": "text", 660 | "axisLabel": "", 661 | "axisPlacement": "auto", 662 | "barAlignment": 0, 663 | "drawStyle": "line", 664 | "fillOpacity": 0, 665 | "gradientMode": "none", 666 | "hideFrom": { 667 | "legend": false, 668 | "tooltip": false, 669 | "viz": false 670 | }, 671 | "insertNulls": false, 672 | "lineInterpolation": "linear", 673 | "lineStyle": { 674 | "fill": "solid" 675 | }, 676 | "lineWidth": 1, 677 | "pointSize": 5, 678 | "scaleDistribution": { 679 | "type": "linear" 680 | }, 681 | "showPoints": "never", 682 | "spanNulls": true, 683 | "stacking": { 684 | "group": "A", 685 | "mode": "none" 686 | }, 687 | "thresholdsStyle": { 688 | "mode": "off" 689 | } 690 | }, 691 | "decimals": 2, 692 | "mappings": [], 693 | "thresholds": { 694 | "mode": "absolute", 695 | "steps": [ 696 | { 697 | "color": "green" 698 | }, 699 | { 700 | "color": "red", 701 | "value": 80 702 | } 703 | ] 704 | }, 705 | "unit": "currencyUSD", 706 | "unitScale": true 707 | }, 708 | "overrides": [] 709 | }, 710 | "gridPos": { 711 | "h": 13, 712 | "w": 24, 713 | "x": 0, 714 | "y": 20 715 | }, 716 | "id": 18, 717 | "options": { 718 | "legend": { 719 | "calcs": [], 720 | "displayMode": "table", 721 | "placement": "bottom", 722 | "showLegend": false 723 | }, 724 | "tooltip": { 725 | "mode": "single", 726 | "sort": "none" 727 | } 728 | }, 729 | "pluginVersion": "9.4.4-2911eec", 730 | "targets": [ 731 | { 732 | "datasource": { 733 | "type": "grafana-bigquery-datasource", 734 | "uid": "${DS_BIGQUERY}" 735 | }, 736 | "editorMode": "code", 737 | "format": 1, 738 | "location": "US", 739 | "project": "", 740 | "rawQuery": true, 741 | "rawSql": "select\n timestamp(report_date) as report_date\n , user_type\n , username\n , sum(total_estimated_cost_usd) as sum_cost\nfrom `leaner_query.rpt_bigquery_user_metrics_daily`\nwhere username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n -- and report_date != current_date\ngroup by 1,2,3\norder by 3", 742 | "refId": "A", 743 | "sql": { 744 | "columns": [ 745 | { 746 | "parameters": [], 747 | "type": "function" 748 | } 749 | ], 750 | "groupBy": [ 751 | { 752 | "property": { 753 | "type": "string" 754 | }, 755 | "type": "groupBy" 756 | } 757 | ], 758 | "limit": 50 759 | } 760 | } 761 | ], 762 | "title": "Individual Users and Costs", 763 | "transformations": [ 764 | { 765 | "id": "groupingToMatrix", 766 | "options": { 767 | "columnField": "username", 768 | "rowField": "report_date", 769 | "valueField": "sum_cost" 770 | } 771 | }, 772 | { 773 | "id": "convertFieldType", 774 | "options": { 775 | "conversions": [ 776 | { 777 | "destinationType": "time", 778 | "targetField": "report_date\\username" 779 | } 780 | ], 781 | "fields": {} 782 | } 783 | } 784 | ], 785 | "type": "timeseries" 786 | }, 787 | { 788 | "datasource": { 789 | "type": "grafana-bigquery-datasource", 790 | "uid": "${DS_BIGQUERY}" 791 | }, 792 | "fieldConfig": { 793 | "defaults": { 794 | "color": { 795 | "mode": "palette-classic" 796 | }, 797 | "custom": { 798 | "axisBorderShow": false, 799 | "axisCenteredZero": false, 800 | "axisColorMode": "text", 801 | "axisLabel": "", 802 | "axisPlacement": "auto", 803 | "barAlignment": 0, 804 | "drawStyle": "line", 805 | "fillOpacity": 0, 806 | "gradientMode": "none", 807 | "hideFrom": { 808 | "legend": false, 809 | "tooltip": false, 810 | "viz": false 811 | }, 812 | "insertNulls": false, 813 | "lineInterpolation": "linear", 814 | "lineStyle": { 815 | "fill": "solid" 816 | }, 817 | "lineWidth": 1, 818 | "pointSize": 5, 819 | "scaleDistribution": { 820 | "type": "linear" 821 | }, 822 | "showPoints": "never", 823 | "spanNulls": true, 824 | "stacking": { 825 | "group": "A", 826 | "mode": "none" 827 | }, 828 | "thresholdsStyle": { 829 | "mode": "off" 830 | } 831 | }, 832 | "decimals": 2, 833 | "mappings": [], 834 | "thresholds": { 835 | "mode": "absolute", 836 | "steps": [ 837 | { 838 | "color": "green" 839 | }, 840 | { 841 | "color": "red", 842 | "value": 80 843 | } 844 | ] 845 | }, 846 | "unit": "currencyUSD", 847 | "unitScale": true 848 | }, 849 | "overrides": [] 850 | }, 851 | "gridPos": { 852 | "h": 13, 853 | "w": 24, 854 | "x": 0, 855 | "y": 33 856 | }, 857 | "id": 19, 858 | "options": { 859 | "legend": { 860 | "calcs": [ 861 | "first", 862 | "last", 863 | "diff", 864 | "diffperc" 865 | ], 866 | "displayMode": "table", 867 | "placement": "right", 868 | "showLegend": true, 869 | "sortBy": "Last", 870 | "sortDesc": true, 871 | "width": 300 872 | }, 873 | "tooltip": { 874 | "mode": "single", 875 | "sort": "none" 876 | } 877 | }, 878 | "pluginVersion": "9.4.4-2911eec", 879 | "targets": [ 880 | { 881 | "datasource": { 882 | "type": "grafana-bigquery-datasource", 883 | "uid": "${DS_BIGQUERY}" 884 | }, 885 | "editorMode": "code", 886 | "format": 1, 887 | "location": "US", 888 | "project": "", 889 | "rawQuery": true, 890 | "rawSql": "select\n timestamp(report_date) as report_date\n , client_type\n , total_estimated_cost_usd as sum_cost\nfrom `leaner_query.rpt_bigquery_usage_cost_daily`\nwhere $__timeFilter(timestamp(report_date)) \n-- group by 1,2\norder by 1", 891 | "refId": "A", 892 | "sql": { 893 | "columns": [ 894 | { 895 | "parameters": [], 896 | "type": "function" 897 | } 898 | ], 899 | "groupBy": [ 900 | { 901 | "property": { 902 | "type": "string" 903 | }, 904 | "type": "groupBy" 905 | } 906 | ], 907 | "limit": 50 908 | } 909 | } 910 | ], 911 | "title": "Costs by Client Type", 912 | "transformations": [ 913 | { 914 | "id": "groupingToMatrix", 915 | "options": { 916 | "columnField": "client_type", 917 | "rowField": "report_date", 918 | "valueField": "sum_cost" 919 | } 920 | }, 921 | { 922 | "id": "convertFieldType", 923 | "options": { 924 | "conversions": [ 925 | { 926 | "destinationType": "time", 927 | "targetField": "report_date\\client_type" 928 | } 929 | ], 930 | "fields": {} 931 | } 932 | } 933 | ], 934 | "type": "timeseries" 935 | }, 936 | { 937 | "datasource": { 938 | "type": "grafana-bigquery-datasource", 939 | "uid": "${DS_BIGQUERY}" 940 | }, 941 | "description": "Daily costs associated with dbt runs, tests, and snapshots.", 942 | "fieldConfig": { 943 | "defaults": { 944 | "color": { 945 | "mode": "palette-classic" 946 | }, 947 | "custom": { 948 | "axisBorderShow": false, 949 | "axisCenteredZero": false, 950 | "axisColorMode": "text", 951 | "axisLabel": "", 952 | "axisPlacement": "auto", 953 | "barAlignment": 0, 954 | "drawStyle": "line", 955 | "fillOpacity": 0, 956 | "gradientMode": "none", 957 | "hideFrom": { 958 | "legend": false, 959 | "tooltip": false, 960 | "viz": false 961 | }, 962 | "insertNulls": false, 963 | "lineInterpolation": "linear", 964 | "lineWidth": 1, 965 | "pointSize": 2, 966 | "scaleDistribution": { 967 | "type": "linear" 968 | }, 969 | "showPoints": "auto", 970 | "spanNulls": false, 971 | "stacking": { 972 | "group": "A", 973 | "mode": "none" 974 | }, 975 | "thresholdsStyle": { 976 | "mode": "off" 977 | } 978 | }, 979 | "mappings": [], 980 | "thresholds": { 981 | "mode": "absolute", 982 | "steps": [ 983 | { 984 | "color": "green" 985 | }, 986 | { 987 | "color": "red", 988 | "value": 80 989 | } 990 | ] 991 | }, 992 | "unit": "currencyUSD", 993 | "unitScale": true 994 | }, 995 | "overrides": [] 996 | }, 997 | "gridPos": { 998 | "h": 10, 999 | "w": 24, 1000 | "x": 0, 1001 | "y": 46 1002 | }, 1003 | "id": 14, 1004 | "options": { 1005 | "legend": { 1006 | "calcs": [], 1007 | "displayMode": "list", 1008 | "placement": "bottom", 1009 | "showLegend": true 1010 | }, 1011 | "tooltip": { 1012 | "mode": "single", 1013 | "sort": "none" 1014 | } 1015 | }, 1016 | "targets": [ 1017 | { 1018 | "datasource": { 1019 | "type": "grafana-bigquery-datasource", 1020 | "uid": "${DS_BIGQUERY}" 1021 | }, 1022 | "editorMode": "code", 1023 | "format": 1, 1024 | "location": "US", 1025 | "project": "", 1026 | "rawQuery": true, 1027 | "rawSql": "select\n timestamp(report_date) as report_date,\n round(sum(total_estimated_dbt_run_cost_usd), 2) as daily_build_cost,\n round(sum(total_estimated_dbt_test_cost_usd), 2) as daily_test_cost,\n round(sum(total_estimated_dbt_snapshot_cost_usd), 2) as daily_snapshot_cost\nfrom `leaner_query.rpt_bigquery_dbt_metrics_daily`\nwhere $__timeFilter(timestamp(report_date)) \ngroup by 1\n\n", 1028 | "refId": "A", 1029 | "sql": { 1030 | "columns": [ 1031 | { 1032 | "parameters": [], 1033 | "type": "function" 1034 | } 1035 | ], 1036 | "groupBy": [ 1037 | { 1038 | "property": { 1039 | "type": "string" 1040 | }, 1041 | "type": "groupBy" 1042 | } 1043 | ], 1044 | "limit": 50 1045 | } 1046 | } 1047 | ], 1048 | "title": "Dbt Costs", 1049 | "transformations": [ 1050 | { 1051 | "id": "organize", 1052 | "options": { 1053 | "excludeByName": {}, 1054 | "indexByName": {}, 1055 | "renameByName": { 1056 | "daily_build_cost": "Daily Build Cost", 1057 | "daily_snapshot_cost": "Daily Snapshot Cost", 1058 | "daily_test_cost": "Daily Test Cost" 1059 | } 1060 | } 1061 | } 1062 | ], 1063 | "type": "timeseries" 1064 | }, 1065 | { 1066 | "datasource": { 1067 | "type": "grafana-bigquery-datasource", 1068 | "uid": "${DS_BIGQUERY}" 1069 | }, 1070 | "description": "", 1071 | "fieldConfig": { 1072 | "defaults": { 1073 | "color": { 1074 | "mode": "thresholds" 1075 | }, 1076 | "custom": { 1077 | "align": "auto", 1078 | "cellOptions": { 1079 | "type": "auto" 1080 | }, 1081 | "inspect": false, 1082 | "minWidth": 80 1083 | }, 1084 | "mappings": [], 1085 | "thresholds": { 1086 | "mode": "absolute", 1087 | "steps": [ 1088 | { 1089 | "color": "green" 1090 | }, 1091 | { 1092 | "color": "red", 1093 | "value": 80 1094 | } 1095 | ] 1096 | }, 1097 | "unit": "currencyUSD", 1098 | "unitScale": true 1099 | }, 1100 | "overrides": [ 1101 | { 1102 | "matcher": { 1103 | "id": "byName", 1104 | "options": "table_name" 1105 | }, 1106 | "properties": [ 1107 | { 1108 | "id": "displayName", 1109 | "value": "Table Name" 1110 | } 1111 | ] 1112 | }, 1113 | { 1114 | "matcher": { 1115 | "id": "byName", 1116 | "options": "priority_score" 1117 | }, 1118 | "properties": [ 1119 | { 1120 | "id": "displayName", 1121 | "value": "Priority Score" 1122 | } 1123 | ] 1124 | }, 1125 | { 1126 | "matcher": { 1127 | "id": "byName", 1128 | "options": "total_dbt_builds" 1129 | }, 1130 | "properties": [ 1131 | { 1132 | "id": "unit" 1133 | } 1134 | ] 1135 | } 1136 | ] 1137 | }, 1138 | "gridPos": { 1139 | "h": 8, 1140 | "w": 12, 1141 | "x": 0, 1142 | "y": 56 1143 | }, 1144 | "id": 10, 1145 | "options": { 1146 | "cellHeight": "sm", 1147 | "footer": { 1148 | "countRows": false, 1149 | "fields": "", 1150 | "reducer": [ 1151 | "sum" 1152 | ], 1153 | "show": false 1154 | }, 1155 | "showHeader": true 1156 | }, 1157 | "pluginVersion": "10.4.0-65610", 1158 | "targets": [ 1159 | { 1160 | "datasource": { 1161 | "type": "grafana-bigquery-datasource", 1162 | "uid": "${DS_BIGQUERY}" 1163 | }, 1164 | "editorMode": "code", 1165 | "format": 1, 1166 | "location": "US", 1167 | "project": "", 1168 | "rawQuery": true, 1169 | "rawSql": "select distinct\n dbt_model_name,\n total_estimated_dbt_run_cost_usd,\n average_build_cost,\n total_dbt_builds\nfrom `leaner_query.rpt_bigquery_dbt_metrics_daily`\nwhere report_date = current_date - 1\norder by 2 desc\nlimit 25", 1170 | "refId": "A", 1171 | "sql": { 1172 | "columns": [ 1173 | { 1174 | "parameters": [], 1175 | "type": "function" 1176 | } 1177 | ], 1178 | "groupBy": [ 1179 | { 1180 | "property": { 1181 | "type": "string" 1182 | }, 1183 | "type": "groupBy" 1184 | } 1185 | ], 1186 | "limit": 50 1187 | } 1188 | } 1189 | ], 1190 | "title": "Top 25 Expensive DBT Runs", 1191 | "transformations": [ 1192 | { 1193 | "id": "organize", 1194 | "options": { 1195 | "excludeByName": {}, 1196 | "indexByName": {}, 1197 | "renameByName": { 1198 | "average_build_cost": "Avg Build Cost", 1199 | "dbt_model_name": "Model", 1200 | "importance_score": "Importance Score", 1201 | "total_dbt_builds": "Total Builds", 1202 | "total_estimated_dbt_run_build_cost_usd": "Estimated Cost", 1203 | "total_estimated_dbt_run_cost_usd": "Total Build Cost" 1204 | } 1205 | } 1206 | } 1207 | ], 1208 | "type": "table" 1209 | }, 1210 | { 1211 | "datasource": { 1212 | "type": "grafana-bigquery-datasource", 1213 | "uid": "${DS_BIGQUERY}" 1214 | }, 1215 | "description": "", 1216 | "fieldConfig": { 1217 | "defaults": { 1218 | "color": { 1219 | "mode": "thresholds" 1220 | }, 1221 | "custom": { 1222 | "align": "auto", 1223 | "cellOptions": { 1224 | "type": "auto" 1225 | }, 1226 | "inspect": false 1227 | }, 1228 | "mappings": [], 1229 | "thresholds": { 1230 | "mode": "absolute", 1231 | "steps": [ 1232 | { 1233 | "color": "green" 1234 | }, 1235 | { 1236 | "color": "red", 1237 | "value": 80 1238 | } 1239 | ] 1240 | }, 1241 | "unit": "currencyUSD", 1242 | "unitScale": true 1243 | }, 1244 | "overrides": [ 1245 | { 1246 | "matcher": { 1247 | "id": "byName", 1248 | "options": "table_name" 1249 | }, 1250 | "properties": [ 1251 | { 1252 | "id": "displayName", 1253 | "value": "Table Name" 1254 | } 1255 | ] 1256 | }, 1257 | { 1258 | "matcher": { 1259 | "id": "byName", 1260 | "options": "priority_score" 1261 | }, 1262 | "properties": [ 1263 | { 1264 | "id": "displayName", 1265 | "value": "Priority Score" 1266 | } 1267 | ] 1268 | }, 1269 | { 1270 | "matcher": { 1271 | "id": "byName", 1272 | "options": "total_dbt_tests" 1273 | }, 1274 | "properties": [ 1275 | { 1276 | "id": "unit" 1277 | } 1278 | ] 1279 | } 1280 | ] 1281 | }, 1282 | "gridPos": { 1283 | "h": 8, 1284 | "w": 12, 1285 | "x": 12, 1286 | "y": 56 1287 | }, 1288 | "id": 11, 1289 | "options": { 1290 | "cellHeight": "sm", 1291 | "footer": { 1292 | "countRows": false, 1293 | "fields": "", 1294 | "reducer": [ 1295 | "sum" 1296 | ], 1297 | "show": false 1298 | }, 1299 | "showHeader": true 1300 | }, 1301 | "pluginVersion": "10.4.0-65610", 1302 | "targets": [ 1303 | { 1304 | "datasource": { 1305 | "type": "grafana-bigquery-datasource", 1306 | "uid": "${DS_BIGQUERY}" 1307 | }, 1308 | "editorMode": "code", 1309 | "format": 1, 1310 | "location": "US", 1311 | "project": "", 1312 | "rawQuery": true, 1313 | "rawSql": "select distinct\n dbt_model_name,\n total_estimated_dbt_test_cost_usd,\n average_test_cost,\n total_dbt_tests\nfrom `leaner_query.rpt_bigquery_dbt_metrics_daily`\nwhere report_date = current_date - 1\norder by 2 desc\nlimit 25", 1314 | "refId": "A", 1315 | "sql": { 1316 | "columns": [ 1317 | { 1318 | "parameters": [], 1319 | "type": "function" 1320 | } 1321 | ], 1322 | "groupBy": [ 1323 | { 1324 | "property": { 1325 | "type": "string" 1326 | }, 1327 | "type": "groupBy" 1328 | } 1329 | ], 1330 | "limit": 50 1331 | } 1332 | } 1333 | ], 1334 | "title": "Top 25 Expensive DBT Tests", 1335 | "transformations": [ 1336 | { 1337 | "id": "organize", 1338 | "options": { 1339 | "excludeByName": {}, 1340 | "indexByName": {}, 1341 | "renameByName": { 1342 | "average_test_cost": "Avg Test Cost", 1343 | "dbt_model_name": "Test", 1344 | "importance_score": "Importance Score", 1345 | "total_dbt_tests": "Total Tests", 1346 | "total_estimated_dbt_test_build_cost_usd": "Estimated Cost", 1347 | "total_estimated_dbt_test_cost_usd": "Total Test Cost" 1348 | } 1349 | } 1350 | } 1351 | ], 1352 | "type": "table" 1353 | }, 1354 | { 1355 | "datasource": { 1356 | "type": "grafana-bigquery-datasource", 1357 | "uid": "${DS_BIGQUERY}" 1358 | }, 1359 | "description": "", 1360 | "fieldConfig": { 1361 | "defaults": { 1362 | "color": { 1363 | "mode": "thresholds" 1364 | }, 1365 | "custom": { 1366 | "align": "auto", 1367 | "cellOptions": { 1368 | "type": "auto" 1369 | }, 1370 | "inspect": false 1371 | }, 1372 | "mappings": [], 1373 | "thresholds": { 1374 | "mode": "absolute", 1375 | "steps": [ 1376 | { 1377 | "color": "green" 1378 | }, 1379 | { 1380 | "color": "red", 1381 | "value": 80 1382 | } 1383 | ] 1384 | }, 1385 | "unit": "currencyUSD", 1386 | "unitScale": true 1387 | }, 1388 | "overrides": [ 1389 | { 1390 | "matcher": { 1391 | "id": "byName", 1392 | "options": "table_name" 1393 | }, 1394 | "properties": [ 1395 | { 1396 | "id": "displayName", 1397 | "value": "Table Name" 1398 | } 1399 | ] 1400 | }, 1401 | { 1402 | "matcher": { 1403 | "id": "byName", 1404 | "options": "priority_score" 1405 | }, 1406 | "properties": [ 1407 | { 1408 | "id": "displayName", 1409 | "value": "Priority Score" 1410 | } 1411 | ] 1412 | }, 1413 | { 1414 | "matcher": { 1415 | "id": "byName", 1416 | "options": "total_dbt_snapshots" 1417 | }, 1418 | "properties": [ 1419 | { 1420 | "id": "unit" 1421 | } 1422 | ] 1423 | }, 1424 | { 1425 | "matcher": { 1426 | "id": "byName", 1427 | "options": "Snapshot" 1428 | }, 1429 | "properties": [ 1430 | { 1431 | "id": "custom.width", 1432 | "value": 449 1433 | } 1434 | ] 1435 | } 1436 | ] 1437 | }, 1438 | "gridPos": { 1439 | "h": 8, 1440 | "w": 12, 1441 | "x": 0, 1442 | "y": 64 1443 | }, 1444 | "id": 12, 1445 | "options": { 1446 | "cellHeight": "sm", 1447 | "footer": { 1448 | "countRows": false, 1449 | "fields": "", 1450 | "reducer": [ 1451 | "sum" 1452 | ], 1453 | "show": false 1454 | }, 1455 | "showHeader": true, 1456 | "sortBy": [] 1457 | }, 1458 | "pluginVersion": "10.4.0-65610", 1459 | "targets": [ 1460 | { 1461 | "datasource": { 1462 | "type": "grafana-bigquery-datasource", 1463 | "uid": "${DS_BIGQUERY}" 1464 | }, 1465 | "editorMode": "code", 1466 | "format": 1, 1467 | "location": "US", 1468 | "project": "", 1469 | "rawQuery": true, 1470 | "rawSql": "select distinct\n dbt_model_name,\n total_estimated_dbt_snapshot_cost_usd,\n average_snapshot_cost,\n total_dbt_snapshots\nfrom `leaner_query.rpt_bigquery_dbt_metrics_daily`\nwhere report_date = current_date - 1\norder by 2 desc\nlimit 25", 1471 | "refId": "A", 1472 | "sql": { 1473 | "columns": [ 1474 | { 1475 | "parameters": [], 1476 | "type": "function" 1477 | } 1478 | ], 1479 | "groupBy": [ 1480 | { 1481 | "property": { 1482 | "type": "string" 1483 | }, 1484 | "type": "groupBy" 1485 | } 1486 | ], 1487 | "limit": 50 1488 | } 1489 | } 1490 | ], 1491 | "title": "Top 25 Expensive DBT Snapshots", 1492 | "transformations": [ 1493 | { 1494 | "id": "organize", 1495 | "options": { 1496 | "excludeByName": {}, 1497 | "indexByName": {}, 1498 | "renameByName": { 1499 | "average_snapshot_cost": "Avg Snapshot Cost", 1500 | "dbt_model_name": "Snapshot", 1501 | "importance_score": "Importance Score", 1502 | "total_dbt_snapshots": "Total Snapshots", 1503 | "total_estimated_dbt_snapshot_build_cost_usd": "Estimated Cost", 1504 | "total_estimated_dbt_snapshot_cost_usd": "Total Snapshot Cost" 1505 | } 1506 | } 1507 | } 1508 | ], 1509 | "type": "table" 1510 | }, 1511 | { 1512 | "collapsed": false, 1513 | "gridPos": { 1514 | "h": 1, 1515 | "w": 24, 1516 | "x": 0, 1517 | "y": 72 1518 | }, 1519 | "id": 24, 1520 | "panels": [], 1521 | "title": "Usage Stats", 1522 | "type": "row" 1523 | }, 1524 | { 1525 | "datasource": { 1526 | "type": "grafana-bigquery-datasource", 1527 | "uid": "${DS_BIGQUERY}" 1528 | }, 1529 | "description": "Tracks the number of BigQuery errors for each user on a given day.", 1530 | "fieldConfig": { 1531 | "defaults": { 1532 | "color": { 1533 | "mode": "palette-classic" 1534 | }, 1535 | "custom": { 1536 | "axisBorderShow": false, 1537 | "axisCenteredZero": false, 1538 | "axisColorMode": "text", 1539 | "axisLabel": "", 1540 | "axisPlacement": "auto", 1541 | "barAlignment": 0, 1542 | "drawStyle": "line", 1543 | "fillOpacity": 0, 1544 | "gradientMode": "none", 1545 | "hideFrom": { 1546 | "legend": false, 1547 | "tooltip": false, 1548 | "viz": false 1549 | }, 1550 | "insertNulls": false, 1551 | "lineInterpolation": "linear", 1552 | "lineStyle": { 1553 | "fill": "solid" 1554 | }, 1555 | "lineWidth": 1, 1556 | "pointSize": 5, 1557 | "scaleDistribution": { 1558 | "type": "linear" 1559 | }, 1560 | "showPoints": "never", 1561 | "spanNulls": true, 1562 | "stacking": { 1563 | "group": "A", 1564 | "mode": "none" 1565 | }, 1566 | "thresholdsStyle": { 1567 | "mode": "off" 1568 | } 1569 | }, 1570 | "decimals": 0, 1571 | "mappings": [], 1572 | "thresholds": { 1573 | "mode": "absolute", 1574 | "steps": [ 1575 | { 1576 | "color": "green" 1577 | }, 1578 | { 1579 | "color": "red", 1580 | "value": 80 1581 | } 1582 | ] 1583 | }, 1584 | "unit": "none", 1585 | "unitScale": true 1586 | }, 1587 | "overrides": [] 1588 | }, 1589 | "gridPos": { 1590 | "h": 13, 1591 | "w": 24, 1592 | "x": 0, 1593 | "y": 73 1594 | }, 1595 | "id": 9, 1596 | "options": { 1597 | "legend": { 1598 | "calcs": [], 1599 | "displayMode": "table", 1600 | "placement": "bottom", 1601 | "showLegend": false 1602 | }, 1603 | "tooltip": { 1604 | "mode": "single", 1605 | "sort": "none" 1606 | } 1607 | }, 1608 | "pluginVersion": "9.4.4-2911eec", 1609 | "targets": [ 1610 | { 1611 | "datasource": { 1612 | "type": "grafana-bigquery-datasource", 1613 | "uid": "${DS_BIGQUERY}" 1614 | }, 1615 | "editorMode": "code", 1616 | "format": 1, 1617 | "location": "US", 1618 | "project": "", 1619 | "rawQuery": true, 1620 | "rawSql": "select\n timestamp(report_date) as report_date\n , user_type\n , username\n , total_errors\nfrom `leaner_query.rpt_bigquery_user_metrics_daily`\nwhere username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n -- and report_date != current_date\n-- group by 1,2,3\norder by 3", 1621 | "refId": "A", 1622 | "sql": { 1623 | "columns": [ 1624 | { 1625 | "parameters": [], 1626 | "type": "function" 1627 | } 1628 | ], 1629 | "groupBy": [ 1630 | { 1631 | "property": { 1632 | "type": "string" 1633 | }, 1634 | "type": "groupBy" 1635 | } 1636 | ], 1637 | "limit": 50 1638 | } 1639 | } 1640 | ], 1641 | "title": "Individual Users and Errors", 1642 | "transformations": [ 1643 | { 1644 | "id": "groupingToMatrix", 1645 | "options": { 1646 | "columnField": "username", 1647 | "rowField": "report_date", 1648 | "valueField": "total_errors" 1649 | } 1650 | }, 1651 | { 1652 | "id": "convertFieldType", 1653 | "options": { 1654 | "conversions": [ 1655 | { 1656 | "destinationType": "time", 1657 | "targetField": "report_date\\username" 1658 | } 1659 | ], 1660 | "fields": {} 1661 | } 1662 | } 1663 | ], 1664 | "type": "timeseries" 1665 | }, 1666 | { 1667 | "datasource": { 1668 | "type": "grafana-bigquery-datasource", 1669 | "uid": "${DS_BIGQUERY}" 1670 | }, 1671 | "description": "Tracks the percent total of BigQuery errors for each user on a given day.", 1672 | "fieldConfig": { 1673 | "defaults": { 1674 | "color": { 1675 | "mode": "palette-classic" 1676 | }, 1677 | "custom": { 1678 | "axisBorderShow": false, 1679 | "axisCenteredZero": false, 1680 | "axisColorMode": "text", 1681 | "axisLabel": "", 1682 | "axisPlacement": "auto", 1683 | "axisSoftMax": 1, 1684 | "barAlignment": 0, 1685 | "drawStyle": "line", 1686 | "fillOpacity": 0, 1687 | "gradientMode": "none", 1688 | "hideFrom": { 1689 | "legend": false, 1690 | "tooltip": false, 1691 | "viz": false 1692 | }, 1693 | "insertNulls": false, 1694 | "lineInterpolation": "linear", 1695 | "lineStyle": { 1696 | "fill": "solid" 1697 | }, 1698 | "lineWidth": 1, 1699 | "pointSize": 5, 1700 | "scaleDistribution": { 1701 | "type": "linear" 1702 | }, 1703 | "showPoints": "never", 1704 | "spanNulls": true, 1705 | "stacking": { 1706 | "group": "A", 1707 | "mode": "none" 1708 | }, 1709 | "thresholdsStyle": { 1710 | "mode": "off" 1711 | } 1712 | }, 1713 | "decimals": 0, 1714 | "mappings": [], 1715 | "thresholds": { 1716 | "mode": "absolute", 1717 | "steps": [ 1718 | { 1719 | "color": "green" 1720 | }, 1721 | { 1722 | "color": "red", 1723 | "value": 80 1724 | } 1725 | ] 1726 | }, 1727 | "unit": "percentunit", 1728 | "unitScale": true 1729 | }, 1730 | "overrides": [] 1731 | }, 1732 | "gridPos": { 1733 | "h": 13, 1734 | "w": 24, 1735 | "x": 0, 1736 | "y": 86 1737 | }, 1738 | "id": 22, 1739 | "options": { 1740 | "legend": { 1741 | "calcs": [], 1742 | "displayMode": "table", 1743 | "placement": "bottom", 1744 | "showLegend": true 1745 | }, 1746 | "tooltip": { 1747 | "mode": "single", 1748 | "sort": "none" 1749 | } 1750 | }, 1751 | "pluginVersion": "9.4.4-2911eec", 1752 | "targets": [ 1753 | { 1754 | "datasource": { 1755 | "type": "grafana-bigquery-datasource", 1756 | "uid": "${DS_BIGQUERY}" 1757 | }, 1758 | "editorMode": "code", 1759 | "format": 1, 1760 | "location": "US", 1761 | "project": "", 1762 | "rawQuery": true, 1763 | "rawSql": "with base as (\n select\n timestamp(report_date) as report_date\n , user_type\n , username\n , total_errors\n from `leaner_query.rpt_bigquery_user_metrics_daily`\n where username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n),\n\ntotal_errors as (\n select\n report_date,\n sum(total_errors) as num_total_errors\n from base\n group by 1\n)\n\nselect\n base.report_date,\n base.user_type,\n base.username,\n round(base.total_errors / total_errors.num_total_errors, 2) as perc_total_errors\nfrom base\nleft join total_errors\n on base.report_date = total_errors.report_date\norder by 4 desc\n", 1764 | "refId": "A", 1765 | "sql": { 1766 | "columns": [ 1767 | { 1768 | "parameters": [], 1769 | "type": "function" 1770 | } 1771 | ], 1772 | "groupBy": [ 1773 | { 1774 | "property": { 1775 | "type": "string" 1776 | }, 1777 | "type": "groupBy" 1778 | } 1779 | ], 1780 | "limit": 50 1781 | } 1782 | } 1783 | ], 1784 | "title": "Percent Total of Individual Users and Errors", 1785 | "transformations": [ 1786 | { 1787 | "id": "groupingToMatrix", 1788 | "options": { 1789 | "columnField": "username", 1790 | "rowField": "report_date", 1791 | "valueField": "perc_total_errors" 1792 | } 1793 | }, 1794 | { 1795 | "id": "convertFieldType", 1796 | "options": { 1797 | "conversions": [ 1798 | { 1799 | "destinationType": "time", 1800 | "targetField": "report_date\\username" 1801 | } 1802 | ], 1803 | "fields": {} 1804 | } 1805 | } 1806 | ], 1807 | "type": "timeseries" 1808 | }, 1809 | { 1810 | "datasource": { 1811 | "type": "grafana-bigquery-datasource", 1812 | "uid": "${DS_BIGQUERY}" 1813 | }, 1814 | "description": "Shows how many users have queried each of the layers for a given day. Users can query more than one layer.", 1815 | "fieldConfig": { 1816 | "defaults": { 1817 | "color": { 1818 | "mode": "palette-classic" 1819 | }, 1820 | "custom": { 1821 | "axisBorderShow": false, 1822 | "axisCenteredZero": false, 1823 | "axisColorMode": "text", 1824 | "axisLabel": "", 1825 | "axisPlacement": "auto", 1826 | "axisSoftMax": 1, 1827 | "axisSoftMin": 0, 1828 | "barAlignment": 0, 1829 | "drawStyle": "line", 1830 | "fillOpacity": 0, 1831 | "gradientMode": "none", 1832 | "hideFrom": { 1833 | "legend": false, 1834 | "tooltip": false, 1835 | "viz": false 1836 | }, 1837 | "insertNulls": false, 1838 | "lineInterpolation": "linear", 1839 | "lineStyle": { 1840 | "fill": "solid" 1841 | }, 1842 | "lineWidth": 1, 1843 | "pointSize": 5, 1844 | "scaleDistribution": { 1845 | "type": "linear" 1846 | }, 1847 | "showPoints": "never", 1848 | "spanNulls": true, 1849 | "stacking": { 1850 | "group": "A", 1851 | "mode": "none" 1852 | }, 1853 | "thresholdsStyle": { 1854 | "mode": "off" 1855 | } 1856 | }, 1857 | "mappings": [], 1858 | "thresholds": { 1859 | "mode": "absolute", 1860 | "steps": [ 1861 | { 1862 | "color": "green" 1863 | }, 1864 | { 1865 | "color": "red", 1866 | "value": 80 1867 | } 1868 | ] 1869 | }, 1870 | "unit": "none", 1871 | "unitScale": true 1872 | }, 1873 | "overrides": [] 1874 | }, 1875 | "gridPos": { 1876 | "h": 10, 1877 | "w": 24, 1878 | "x": 0, 1879 | "y": 99 1880 | }, 1881 | "id": 8, 1882 | "options": { 1883 | "legend": { 1884 | "calcs": [], 1885 | "displayMode": "table", 1886 | "placement": "right", 1887 | "showLegend": true 1888 | }, 1889 | "tooltip": { 1890 | "mode": "single", 1891 | "sort": "none" 1892 | } 1893 | }, 1894 | "pluginVersion": "9.4.4-2911eec", 1895 | "targets": [ 1896 | { 1897 | "datasource": { 1898 | "type": "grafana-bigquery-datasource", 1899 | "uid": "${DS_BIGQUERY}" 1900 | }, 1901 | "editorMode": "code", 1902 | "format": 1, 1903 | "location": "US", 1904 | "project": "", 1905 | "rawQuery": true, 1906 | "rawSql": "\n select\n timestamp(report_date) as report_date\n -- , count(distinct if(total_tables_used > 0, username, null)) as users\n , count(if(total_prod_tables_used > 0, 1, null)) as used_prod\n , count(if(total_stage_tables_used > 0, 1, null)) as used_stage\n , count(if(total_raw_tables_used > 0, 1, null)) as used_raw\n from `leaner_query.rpt_bigquery_user_metrics_daily`\n where username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n group by 1\n\n", 1907 | "refId": "A", 1908 | "sql": { 1909 | "columns": [ 1910 | { 1911 | "parameters": [], 1912 | "type": "function" 1913 | } 1914 | ], 1915 | "groupBy": [ 1916 | { 1917 | "property": { 1918 | "type": "string" 1919 | }, 1920 | "type": "groupBy" 1921 | } 1922 | ], 1923 | "limit": 50 1924 | } 1925 | } 1926 | ], 1927 | "title": "Distinct Users by Layer", 1928 | "transformations": [ 1929 | { 1930 | "id": "groupingToMatrix", 1931 | "options": { 1932 | "columnField": "username", 1933 | "rowField": "report_date", 1934 | "valueField": "sum_cost" 1935 | } 1936 | }, 1937 | { 1938 | "id": "convertFieldType", 1939 | "options": { 1940 | "conversions": [ 1941 | { 1942 | "destinationType": "time", 1943 | "targetField": "report_date\\username" 1944 | } 1945 | ], 1946 | "fields": {} 1947 | } 1948 | }, 1949 | { 1950 | "id": "organize", 1951 | "options": { 1952 | "excludeByName": {}, 1953 | "includeByName": {}, 1954 | "indexByName": {}, 1955 | "renameByName": { 1956 | "report_date": "Report Date", 1957 | "used_prod": "Prod", 1958 | "used_raw": "Raw", 1959 | "used_stage": "Stage" 1960 | } 1961 | } 1962 | } 1963 | ], 1964 | "type": "timeseries" 1965 | }, 1966 | { 1967 | "datasource": { 1968 | "type": "grafana-bigquery-datasource", 1969 | "uid": "${DS_BIGQUERY}" 1970 | }, 1971 | "description": "Shows what percentage (of the total use) each data layer was used in queries.", 1972 | "fieldConfig": { 1973 | "defaults": { 1974 | "color": { 1975 | "mode": "palette-classic" 1976 | }, 1977 | "custom": { 1978 | "axisBorderShow": false, 1979 | "axisCenteredZero": false, 1980 | "axisColorMode": "text", 1981 | "axisLabel": "", 1982 | "axisPlacement": "auto", 1983 | "axisSoftMax": 1, 1984 | "axisSoftMin": 0, 1985 | "barAlignment": 0, 1986 | "drawStyle": "line", 1987 | "fillOpacity": 0, 1988 | "gradientMode": "none", 1989 | "hideFrom": { 1990 | "legend": false, 1991 | "tooltip": false, 1992 | "viz": false 1993 | }, 1994 | "insertNulls": false, 1995 | "lineInterpolation": "linear", 1996 | "lineStyle": { 1997 | "fill": "solid" 1998 | }, 1999 | "lineWidth": 1, 2000 | "pointSize": 5, 2001 | "scaleDistribution": { 2002 | "type": "linear" 2003 | }, 2004 | "showPoints": "never", 2005 | "spanNulls": true, 2006 | "stacking": { 2007 | "group": "A", 2008 | "mode": "none" 2009 | }, 2010 | "thresholdsStyle": { 2011 | "mode": "off" 2012 | } 2013 | }, 2014 | "mappings": [], 2015 | "thresholds": { 2016 | "mode": "absolute", 2017 | "steps": [ 2018 | { 2019 | "color": "green" 2020 | }, 2021 | { 2022 | "color": "red", 2023 | "value": 80 2024 | } 2025 | ] 2026 | }, 2027 | "unit": "percentunit", 2028 | "unitScale": true 2029 | }, 2030 | "overrides": [] 2031 | }, 2032 | "gridPos": { 2033 | "h": 10, 2034 | "w": 24, 2035 | "x": 0, 2036 | "y": 109 2037 | }, 2038 | "id": 20, 2039 | "options": { 2040 | "legend": { 2041 | "calcs": [], 2042 | "displayMode": "table", 2043 | "placement": "right", 2044 | "showLegend": true 2045 | }, 2046 | "tooltip": { 2047 | "mode": "single", 2048 | "sort": "none" 2049 | } 2050 | }, 2051 | "pluginVersion": "9.4.4-2911eec", 2052 | "targets": [ 2053 | { 2054 | "datasource": { 2055 | "type": "grafana-bigquery-datasource", 2056 | "uid": "${DS_BIGQUERY}" 2057 | }, 2058 | "editorMode": "code", 2059 | "format": 1, 2060 | "location": "US", 2061 | "project": "", 2062 | "rawQuery": true, 2063 | "rawSql": "with daily_totals as(\nSELECT \n report_date,\n sum(total_prod_tables_used) as total_prod_tables_used,\n sum(total_stage_tables_used) as total_stage_tables_used,\n sum(total_raw_tables_used) as total_raw_tables_used,\n FROM `leaner_query.rpt_bigquery_user_metrics_daily` \n where username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n group by 1\n ),\nadd_total_use as(\n select *,\n daily_totals.total_prod_tables_used + daily_totals.total_stage_tables_used + daily_totals.total_raw_tables_used as total_tables_used\n from daily_totals\n)\nselect \n timestamp(report_date) as report_date,\n add_total_use.total_prod_tables_used / add_total_use.total_tables_used as `Percent Prod Used`,\n add_total_use.total_stage_tables_used / add_total_use.total_tables_used as `Percent Stage Used`,\n add_total_use.total_raw_tables_used / add_total_use.total_tables_used as `Percent Raw Used`,\nfrom add_total_use;", 2064 | "refId": "A", 2065 | "sql": { 2066 | "columns": [ 2067 | { 2068 | "parameters": [], 2069 | "type": "function" 2070 | } 2071 | ], 2072 | "groupBy": [ 2073 | { 2074 | "property": { 2075 | "type": "string" 2076 | }, 2077 | "type": "groupBy" 2078 | } 2079 | ], 2080 | "limit": 50 2081 | } 2082 | } 2083 | ], 2084 | "title": "Percent layer use", 2085 | "transformations": [ 2086 | { 2087 | "id": "groupingToMatrix", 2088 | "options": { 2089 | "columnField": "username", 2090 | "rowField": "report_date", 2091 | "valueField": "sum_cost" 2092 | } 2093 | }, 2094 | { 2095 | "id": "convertFieldType", 2096 | "options": { 2097 | "conversions": [ 2098 | { 2099 | "destinationType": "time", 2100 | "targetField": "report_date\\username" 2101 | } 2102 | ], 2103 | "fields": {} 2104 | } 2105 | }, 2106 | { 2107 | "id": "organize", 2108 | "options": { 2109 | "excludeByName": {}, 2110 | "includeByName": {}, 2111 | "indexByName": {}, 2112 | "renameByName": { 2113 | "Percent Prod Used": "Percent Prod", 2114 | "Percent Raw Used": "Percent Raw", 2115 | "Percent Stage Used": "Percent Stage" 2116 | } 2117 | } 2118 | } 2119 | ], 2120 | "type": "timeseries" 2121 | }, 2122 | { 2123 | "datasource": { 2124 | "type": "grafana-bigquery-datasource", 2125 | "uid": "${DS_BIGQUERY}" 2126 | }, 2127 | "description": "Shows the total number of times each data layer was used in queries.", 2128 | "fieldConfig": { 2129 | "defaults": { 2130 | "color": { 2131 | "mode": "palette-classic" 2132 | }, 2133 | "custom": { 2134 | "axisBorderShow": false, 2135 | "axisCenteredZero": false, 2136 | "axisColorMode": "text", 2137 | "axisLabel": "", 2138 | "axisPlacement": "auto", 2139 | "fillOpacity": 80, 2140 | "gradientMode": "none", 2141 | "hideFrom": { 2142 | "legend": false, 2143 | "tooltip": false, 2144 | "viz": false 2145 | }, 2146 | "lineWidth": 1, 2147 | "scaleDistribution": { 2148 | "type": "linear" 2149 | }, 2150 | "thresholdsStyle": { 2151 | "mode": "off" 2152 | } 2153 | }, 2154 | "mappings": [], 2155 | "thresholds": { 2156 | "mode": "absolute", 2157 | "steps": [ 2158 | { 2159 | "color": "green" 2160 | }, 2161 | { 2162 | "color": "red", 2163 | "value": 80 2164 | } 2165 | ] 2166 | }, 2167 | "unit": "none", 2168 | "unitScale": true 2169 | }, 2170 | "overrides": [] 2171 | }, 2172 | "gridPos": { 2173 | "h": 10, 2174 | "w": 24, 2175 | "x": 0, 2176 | "y": 119 2177 | }, 2178 | "id": 21, 2179 | "options": { 2180 | "barRadius": 0, 2181 | "barWidth": 0.97, 2182 | "fullHighlight": false, 2183 | "groupWidth": 0.7, 2184 | "legend": { 2185 | "calcs": [], 2186 | "displayMode": "list", 2187 | "placement": "bottom", 2188 | "showLegend": true 2189 | }, 2190 | "orientation": "auto", 2191 | "showValue": "auto", 2192 | "stacking": "normal", 2193 | "tooltip": { 2194 | "mode": "single", 2195 | "sort": "none" 2196 | }, 2197 | "xTickLabelRotation": 45, 2198 | "xTickLabelSpacing": 100 2199 | }, 2200 | "pluginVersion": "9.4.4-2911eec", 2201 | "targets": [ 2202 | { 2203 | "datasource": { 2204 | "type": "grafana-bigquery-datasource", 2205 | "uid": "${DS_BIGQUERY}" 2206 | }, 2207 | "editorMode": "code", 2208 | "format": 1, 2209 | "location": "US", 2210 | "project": "", 2211 | "rawQuery": true, 2212 | "rawSql": "SELECT \n timestamp(report_date),\n sum(total_prod_tables_used) as `Total prod tables used`,\n sum(total_stage_tables_used) as `Total stage tables used`,\n sum(total_raw_tables_used) as `Total rawtables used`,\n FROM `leaner_query.rpt_bigquery_user_metrics_daily` \n where username in ($usernames)\n and user_type in ($user_type)\n and $__timeFilter(timestamp(report_date)) \n group by 1", 2213 | "refId": "A", 2214 | "sql": { 2215 | "columns": [ 2216 | { 2217 | "parameters": [], 2218 | "type": "function" 2219 | } 2220 | ], 2221 | "groupBy": [ 2222 | { 2223 | "property": { 2224 | "type": "string" 2225 | }, 2226 | "type": "groupBy" 2227 | } 2228 | ], 2229 | "limit": 50 2230 | } 2231 | } 2232 | ], 2233 | "title": "Table usage by layer", 2234 | "transformations": [ 2235 | { 2236 | "id": "groupingToMatrix", 2237 | "options": { 2238 | "columnField": "username", 2239 | "rowField": "report_date", 2240 | "valueField": "sum_cost" 2241 | } 2242 | }, 2243 | { 2244 | "id": "convertFieldType", 2245 | "options": { 2246 | "conversions": [ 2247 | { 2248 | "destinationType": "time", 2249 | "targetField": "report_date\\username" 2250 | } 2251 | ], 2252 | "fields": {} 2253 | } 2254 | } 2255 | ], 2256 | "type": "barchart" 2257 | }, 2258 | { 2259 | "datasource": { 2260 | "type": "grafana-bigquery-datasource", 2261 | "uid": "${DS_BIGQUERY}" 2262 | }, 2263 | "description": "This table contains the top 25 tables in your BQ instance which have the highest threat levels.\n\nA table risk score is determined by how many external systems are connected via service accounts, the median cost to query, median cost to build, and the median daily errors incurred.", 2264 | "fieldConfig": { 2265 | "defaults": { 2266 | "color": { 2267 | "mode": "thresholds" 2268 | }, 2269 | "custom": { 2270 | "align": "auto", 2271 | "cellOptions": { 2272 | "type": "auto" 2273 | }, 2274 | "inspect": false 2275 | }, 2276 | "mappings": [], 2277 | "thresholds": { 2278 | "mode": "absolute", 2279 | "steps": [ 2280 | { 2281 | "color": "green" 2282 | }, 2283 | { 2284 | "color": "red", 2285 | "value": 80 2286 | } 2287 | ] 2288 | }, 2289 | "unitScale": true 2290 | }, 2291 | "overrides": [ 2292 | { 2293 | "matcher": { 2294 | "id": "byName", 2295 | "options": "table_name" 2296 | }, 2297 | "properties": [ 2298 | { 2299 | "id": "displayName", 2300 | "value": "Table Name" 2301 | } 2302 | ] 2303 | }, 2304 | { 2305 | "matcher": { 2306 | "id": "byName", 2307 | "options": "priority_score" 2308 | }, 2309 | "properties": [ 2310 | { 2311 | "id": "displayName", 2312 | "value": "Priority Score" 2313 | } 2314 | ] 2315 | } 2316 | ] 2317 | }, 2318 | "gridPos": { 2319 | "h": 8, 2320 | "w": 12, 2321 | "x": 0, 2322 | "y": 129 2323 | }, 2324 | "id": 6, 2325 | "options": { 2326 | "cellHeight": "sm", 2327 | "footer": { 2328 | "countRows": false, 2329 | "fields": "", 2330 | "reducer": [ 2331 | "sum" 2332 | ], 2333 | "show": false 2334 | }, 2335 | "showHeader": true 2336 | }, 2337 | "pluginVersion": "10.4.0-65610", 2338 | "targets": [ 2339 | { 2340 | "datasource": { 2341 | "type": "grafana-bigquery-datasource", 2342 | "uid": "${DS_BIGQUERY}" 2343 | }, 2344 | "editorMode": "code", 2345 | "format": 1, 2346 | "location": "US", 2347 | "project": "", 2348 | "rawQuery": true, 2349 | "rawSql": "select distinct\n concat(split(table_name, '/')[safe_offset(3)], '.', split(table_name, '/')[safe_offset(5)]) as table_name\n, threat_score\nfrom `leaner_query.rpt_bigquery_table_usage_daily`\nwhere report_date = current_date -1\n and threat_score is not null\norder by threat_score desc\nlimit 25 -- maybe a rank instead of this later on", 2350 | "refId": "A", 2351 | "sql": { 2352 | "columns": [ 2353 | { 2354 | "parameters": [], 2355 | "type": "function" 2356 | } 2357 | ], 2358 | "groupBy": [ 2359 | { 2360 | "property": { 2361 | "type": "string" 2362 | }, 2363 | "type": "groupBy" 2364 | } 2365 | ], 2366 | "limit": 50 2367 | } 2368 | } 2369 | ], 2370 | "title": "Top 25 Riskiest Tables", 2371 | "transformations": [ 2372 | { 2373 | "id": "organize", 2374 | "options": { 2375 | "excludeByName": {}, 2376 | "indexByName": {}, 2377 | "renameByName": { 2378 | "table_name": "Table Name", 2379 | "threat_score": "Threat Score" 2380 | } 2381 | } 2382 | } 2383 | ], 2384 | "type": "table" 2385 | }, 2386 | { 2387 | "datasource": { 2388 | "type": "grafana-bigquery-datasource", 2389 | "uid": "${DS_BIGQUERY}" 2390 | }, 2391 | "description": "This table contains the top 25 tables in your BQ instance which have the highest importance levels.\n\nThe importance calculation is a combination of Service Account usage, breadth of non-service account use, external system use, and the degree to which other data objects depend on this object.", 2392 | "fieldConfig": { 2393 | "defaults": { 2394 | "color": { 2395 | "mode": "thresholds" 2396 | }, 2397 | "custom": { 2398 | "align": "auto", 2399 | "cellOptions": { 2400 | "type": "auto" 2401 | }, 2402 | "inspect": false 2403 | }, 2404 | "mappings": [], 2405 | "thresholds": { 2406 | "mode": "absolute", 2407 | "steps": [ 2408 | { 2409 | "color": "green" 2410 | }, 2411 | { 2412 | "color": "red", 2413 | "value": 80 2414 | } 2415 | ] 2416 | }, 2417 | "unitScale": true 2418 | }, 2419 | "overrides": [ 2420 | { 2421 | "matcher": { 2422 | "id": "byName", 2423 | "options": "table_name" 2424 | }, 2425 | "properties": [ 2426 | { 2427 | "id": "displayName", 2428 | "value": "Table Name" 2429 | } 2430 | ] 2431 | }, 2432 | { 2433 | "matcher": { 2434 | "id": "byName", 2435 | "options": "priority_score" 2436 | }, 2437 | "properties": [ 2438 | { 2439 | "id": "displayName", 2440 | "value": "Priority Score" 2441 | } 2442 | ] 2443 | } 2444 | ] 2445 | }, 2446 | "gridPos": { 2447 | "h": 8, 2448 | "w": 12, 2449 | "x": 12, 2450 | "y": 129 2451 | }, 2452 | "id": 7, 2453 | "options": { 2454 | "cellHeight": "sm", 2455 | "footer": { 2456 | "countRows": false, 2457 | "fields": "", 2458 | "reducer": [ 2459 | "sum" 2460 | ], 2461 | "show": false 2462 | }, 2463 | "showHeader": true 2464 | }, 2465 | "pluginVersion": "10.4.0-65610", 2466 | "targets": [ 2467 | { 2468 | "datasource": { 2469 | "type": "grafana-bigquery-datasource", 2470 | "uid": "${DS_BIGQUERY}" 2471 | }, 2472 | "editorMode": "code", 2473 | "format": 1, 2474 | "location": "US", 2475 | "project": "", 2476 | "rawQuery": true, 2477 | "rawSql": "select distinct\n concat(split(table_name, '/')[safe_offset(3)], '.', split(table_name, '/')[safe_offset(5)]) as table_name\n, importance_score\nfrom `leaner_query.rpt_bigquery_table_usage_daily`\nwhere report_date = current_date -1\n and importance_score is not null\norder by importance_score desc\nlimit 25 -- maybe a rank instead of this later on", 2478 | "refId": "A", 2479 | "sql": { 2480 | "columns": [ 2481 | { 2482 | "parameters": [], 2483 | "type": "function" 2484 | } 2485 | ], 2486 | "groupBy": [ 2487 | { 2488 | "property": { 2489 | "type": "string" 2490 | }, 2491 | "type": "groupBy" 2492 | } 2493 | ], 2494 | "limit": 50 2495 | } 2496 | } 2497 | ], 2498 | "title": "Top 25 Important Tables", 2499 | "transformations": [ 2500 | { 2501 | "id": "organize", 2502 | "options": { 2503 | "excludeByName": {}, 2504 | "indexByName": {}, 2505 | "renameByName": { 2506 | "importance_score": "Importance Score" 2507 | } 2508 | } 2509 | } 2510 | ], 2511 | "type": "table" 2512 | }, 2513 | { 2514 | "datasource": { 2515 | "type": "grafana-bigquery-datasource", 2516 | "uid": "${DS_BIGQUERY}" 2517 | }, 2518 | "description": "This table contains the top 25 tables in your BQ instance which should have priority to be examined for optimization.", 2519 | "fieldConfig": { 2520 | "defaults": { 2521 | "color": { 2522 | "mode": "thresholds" 2523 | }, 2524 | "custom": { 2525 | "align": "auto", 2526 | "cellOptions": { 2527 | "type": "auto" 2528 | }, 2529 | "inspect": false 2530 | }, 2531 | "mappings": [], 2532 | "thresholds": { 2533 | "mode": "absolute", 2534 | "steps": [ 2535 | { 2536 | "color": "green" 2537 | }, 2538 | { 2539 | "color": "red", 2540 | "value": 80 2541 | } 2542 | ] 2543 | }, 2544 | "unitScale": true 2545 | }, 2546 | "overrides": [ 2547 | { 2548 | "matcher": { 2549 | "id": "byName", 2550 | "options": "table_name" 2551 | }, 2552 | "properties": [ 2553 | { 2554 | "id": "displayName", 2555 | "value": "Table Name" 2556 | } 2557 | ] 2558 | }, 2559 | { 2560 | "matcher": { 2561 | "id": "byName", 2562 | "options": "priority_score" 2563 | }, 2564 | "properties": [ 2565 | { 2566 | "id": "displayName", 2567 | "value": "Priority Score" 2568 | } 2569 | ] 2570 | } 2571 | ] 2572 | }, 2573 | "gridPos": { 2574 | "h": 8, 2575 | "w": 12, 2576 | "x": 0, 2577 | "y": 137 2578 | }, 2579 | "id": 5, 2580 | "options": { 2581 | "cellHeight": "sm", 2582 | "footer": { 2583 | "countRows": false, 2584 | "fields": "", 2585 | "reducer": [ 2586 | "sum" 2587 | ], 2588 | "show": false 2589 | }, 2590 | "showHeader": true 2591 | }, 2592 | "pluginVersion": "10.4.0-65610", 2593 | "targets": [ 2594 | { 2595 | "datasource": { 2596 | "type": "grafana-bigquery-datasource", 2597 | "uid": "${DS_BIGQUERY}" 2598 | }, 2599 | "editorMode": "code", 2600 | "format": 1, 2601 | "location": "US", 2602 | "project": "", 2603 | "rawQuery": true, 2604 | "rawSql": "select distinct\n concat(split(table_name, '/')[safe_offset(3)], '.', split(table_name, '/')[safe_offset(5)]) as table_name\n, priority_score\nfrom `leaner_query.rpt_bigquery_table_usage_daily`\nwhere report_date = current_date -1\n and priority_score is not null\norder by priority_score desc\nlimit 25 -- maybe a rank instead of this later on", 2605 | "refId": "A", 2606 | "sql": { 2607 | "columns": [ 2608 | { 2609 | "parameters": [], 2610 | "type": "function" 2611 | } 2612 | ], 2613 | "groupBy": [ 2614 | { 2615 | "property": { 2616 | "type": "string" 2617 | }, 2618 | "type": "groupBy" 2619 | } 2620 | ], 2621 | "limit": 50 2622 | } 2623 | } 2624 | ], 2625 | "title": "Top 25 Priority Tables", 2626 | "type": "table" 2627 | } 2628 | ], 2629 | "refresh": "", 2630 | "revision": 1, 2631 | "schemaVersion": 39, 2632 | "tags": [], 2633 | "templating": { 2634 | "list": [ 2635 | { 2636 | "current": {}, 2637 | "datasource": { 2638 | "type": "grafana-bigquery-datasource", 2639 | "uid": "${DS_BIGQUERY}" 2640 | }, 2641 | "definition": "", 2642 | "hide": 0, 2643 | "includeAll": true, 2644 | "label": "Usernames", 2645 | "multi": true, 2646 | "name": "usernames", 2647 | "options": [], 2648 | "query": { 2649 | "editorMode": "code", 2650 | "format": 1, 2651 | "location": "US", 2652 | "project": "", 2653 | "rawQuery": true, 2654 | "rawSql": "select distinct\n username\nfrom `leaner_query.rpt_bigquery_user_metrics_daily`\nwhere report_date > '1000-03-01'\norder by username asc", 2655 | "sql": { 2656 | "columns": [ 2657 | { 2658 | "parameters": [], 2659 | "type": "function" 2660 | } 2661 | ], 2662 | "groupBy": [ 2663 | { 2664 | "property": { 2665 | "type": "string" 2666 | }, 2667 | "type": "groupBy" 2668 | } 2669 | ], 2670 | "limit": 50 2671 | } 2672 | }, 2673 | "refresh": 1, 2674 | "regex": "", 2675 | "skipUrlSync": false, 2676 | "sort": 1, 2677 | "type": "query" 2678 | }, 2679 | { 2680 | "current": {}, 2681 | "datasource": { 2682 | "type": "grafana-bigquery-datasource", 2683 | "uid": "${DS_BIGQUERY}" 2684 | }, 2685 | "definition": "", 2686 | "hide": 0, 2687 | "includeAll": true, 2688 | "label": "User Type", 2689 | "multi": true, 2690 | "name": "user_type", 2691 | "options": [], 2692 | "query": { 2693 | "editorMode": "code", 2694 | "format": 1, 2695 | "location": "US", 2696 | "project": "", 2697 | "rawQuery": true, 2698 | "rawSql": "select distinct\n user_type\nfrom `leaner_query.rpt_bigquery_user_metrics_daily`\nwhere report_date > '1000-03-01'\norder by 1", 2699 | "sql": { 2700 | "columns": [ 2701 | { 2702 | "parameters": [], 2703 | "type": "function" 2704 | } 2705 | ], 2706 | "groupBy": [ 2707 | { 2708 | "property": { 2709 | "type": "string" 2710 | }, 2711 | "type": "groupBy" 2712 | } 2713 | ], 2714 | "limit": 50 2715 | } 2716 | }, 2717 | "refresh": 1, 2718 | "regex": "", 2719 | "skipUrlSync": false, 2720 | "sort": 1, 2721 | "type": "query" 2722 | }, 2723 | { 2724 | "current": { 2725 | "selected": true, 2726 | "text": "biguery", 2727 | "value": "BWkWsc1nk" 2728 | }, 2729 | "hide": 0, 2730 | "includeAll": false, 2731 | "label": "BigQuery Datasource", 2732 | "multi": false, 2733 | "name": "bigquery_datasource", 2734 | "options": [], 2735 | "query": "grafana-bigquery-datasource", 2736 | "queryValue": "", 2737 | "refresh": 1, 2738 | "regex": "", 2739 | "skipUrlSync": false, 2740 | "type": "datasource" 2741 | } 2742 | ] 2743 | }, 2744 | "time": { 2745 | "from": "now-1y", 2746 | "to": "now - 1d" 2747 | }, 2748 | "timepicker": {}, 2749 | "timezone": "", 2750 | "title": "BigQuery & DBT metrics", 2751 | "uid": "", 2752 | "version": 92, 2753 | "weekStart": "" 2754 | } -------------------------------------------------------------------------------- /macros/build_client_type.sql: -------------------------------------------------------------------------------- 1 | {% macro build_client_type(custom_client) %} 2 | when 3 | {% set and_criteria = false -%} 4 | {% if custom_client["user_agent"] is not none -%} 5 | caller_supplied_user_agent like '{{ custom_client["user_agent"] }}%' 6 | {% set and_criteria = true -%} 7 | {% endif -%} 8 | {% if custom_client["principal_email"] is not none -%} 9 | {% if and_criteria -%} 10 | and 11 | {% endif -%} 12 | principal_email like '%{{custom_client["principal_email"]}}%' 13 | {% endif -%} 14 | then '{{custom_client["client_name"]}}' 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /macros/build_egress_emails.sql: -------------------------------------------------------------------------------- 1 | {% macro build_custom_egress_email_list() %} 2 | ({% for i in var('leaner_query_custom_egress_emails') %} 3 | '{{ i }}' 4 | {% if not loop.last %},{% endif %} 5 | {% endfor %}) 6 | {% endmacro %} 7 | 8 | -------------------------------------------------------------------------------- /macros/calc_bq_cost.sql: -------------------------------------------------------------------------------- 1 | {% macro calc_bq_on_demand_costs(total_billed_bytes) %} 2 | ( safe_divide({{ total_billed_bytes }}, pow(2,40)) * {{ var('leaner_query_bq_on_demand_pricing') }} ) 3 | {% endmacro %} 4 | 5 | {% macro calc_bq_slot_based_costs(pricing_package, slot_time_ms) %} 6 | {# slot pricing #} 7 | {% set price_per_ms = var('leaner_query_bq_slot_pricing') %} 8 | (total_slot_ms) / ((1000*60*60) * {{ price_per_ms[pricing_package] }}) 9 | {% endmacro %} 10 | 11 | {% macro calc_bq_cost(total_billed_bytes, slot_time_ms) %} 12 | {% if var('leaner_query_bq_pricing_schedule') == 'on_demand' %} 13 | {{ calc_bq_on_demand_costs(total_billed_bytes) }} 14 | {% else %} 15 | {{ calc_bq_slot_based_costs(var('leaner_query_bq_pricing_schedule'), slot_time_ms) }} 16 | {% endif %} 17 | {% endmacro %} 18 | -------------------------------------------------------------------------------- /macros/determine_data_layer.sql: -------------------------------------------------------------------------------- 1 | {% macro determine_data_layer(table_name) %} 2 | case 3 | {% for stage_name in var('leaner_query_stage_dataset_names') %} 4 | when {{ table_name }} = '{{stage_name}}' then 'stage' 5 | {% endfor -%} 6 | {% for prod_name in var('leaner_query_prod_dataset_names') %} 7 | when {{ table_name }} = '{{prod_name}}' then 'prod' 8 | {% endfor %} 9 | when {{ table_name }} is null then 'None' 10 | else 'raw' 11 | end 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/generate_surrogate_key.sql: -------------------------------------------------------------------------------- 1 | {# Extracted majority of logic from dbt_utils.generate_surrogate_key macro 2 | into this macro to avoid collisions and dpendencies. 3 | #} 4 | 5 | {%- macro generate_surrogate_key(field_list) -%} 6 | {% set default_null_value = '_leaner_query_surrogate_key_null_'%} 7 | 8 | {%- set fields = [] -%} 9 | 10 | {%- for field in field_list -%} 11 | 12 | {%- do fields.append( 13 | "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')" 14 | ) -%} 15 | 16 | {%- if not loop.last %} 17 | {%- do fields.append("'-'") -%} 18 | {%- endif -%} 19 | 20 | {%- endfor -%} 21 | 22 | {{ dbt.hash(dbt.concat(fields)) }} 23 | 24 | {%- endmacro -%} 25 | -------------------------------------------------------------------------------- /macros/test_unique_combination_of_columns.sql: -------------------------------------------------------------------------------- 1 | {% test unique_combination_of_columns(model, combination_of_columns, quote_columns=false) %} 2 | {{ return(default__test_unique_combination_of_columns(model, combination_of_columns, quote_columns)) }} 3 | {% endtest %} 4 | 5 | {% macro default__test_unique_combination_of_columns(model, combination_of_columns, quote_columns=false) %} 6 | 7 | {% if not quote_columns %} 8 | {%- set column_list=combination_of_columns %} 9 | {% elif quote_columns %} 10 | {%- set column_list=[] %} 11 | {% for column in combination_of_columns -%} 12 | {% set column_list = column_list.append( adapter.quote(column) ) %} 13 | {%- endfor %} 14 | {% else %} 15 | {{ exceptions.raise_compiler_error( 16 | "`quote_columns` argument for unique_combination_of_columns test must be one of [True, False] Got: '" ~ quote ~"'.'" 17 | ) }} 18 | {% endif %} 19 | 20 | {%- set columns_csv=column_list | join(', ') %} 21 | 22 | 23 | with validation_errors as ( 24 | 25 | select 26 | {{ columns_csv }} 27 | from {{ model }} 28 | group by {{ columns_csv }} 29 | having count(*) > 1 30 | 31 | ) 32 | 33 | select * 34 | from validation_errors 35 | 36 | 37 | {% endmacro %} -------------------------------------------------------------------------------- /models/intermediate/int_bigquery_audit_log.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: int_threat_calculation 6 | description: "" 7 | 8 | - name: int_importance_calculations 9 | description: "" 10 | 11 | - name: int_table_active_users 12 | description: "" 13 | -------------------------------------------------------------------------------- /models/intermediate/int_importance_calculations.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | cluster_by = ['table_name'], 11 | partition_by={ 12 | "field": "score_date", 13 | "data_type": "date", 14 | "granularity": "day" 15 | }, 16 | materialized = 'incremental', 17 | partitions = partitions_to_replace, 18 | incremental_strategy = 'insert_overwrite' 19 | ) }} 20 | 21 | with fct_executed_statements as ( 22 | select * 23 | from {{ ref('fct_executed_statements') }} 24 | where 1=1 25 | {% if is_incremental() %} 26 | and date(statement_date) >= current_date - 3 27 | {% endif %} 28 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 29 | and date(statement_date) >= current_date - {{ var('leaner_query_dev_limit_days') }} 30 | {% endif %} 31 | 32 | ), 33 | 34 | min_event_date as( 35 | select min(statement_date) as min_date 36 | from fct_executed_statements 37 | ), 38 | 39 | calendar as ( 40 | select date_day 41 | from {{ ref('dim_leaner_query_date') }} 42 | inner join min_event_date on date_day between min_date and current_date 43 | ), 44 | 45 | dim_table_view_references as( 46 | select * 47 | from {{ ref('dim_job_table_view_references') }} 48 | where object_type = 'table' 49 | ), 50 | 51 | dim_bq_users as( 52 | select * 53 | from {{ ref('dim_bigquery_users') }} 54 | ), 55 | 56 | dim_job_labels as( 57 | select * 58 | from {{ ref('dim_job_labels') }} 59 | ), 60 | 61 | dim_user_agents as( 62 | select * 63 | from {{ ref('dim_user_agents') }} 64 | ), 65 | 66 | int_table_active_users as( 67 | select * 68 | from {{ ref('int_table_active_users') }} 69 | ), 70 | 71 | base as ( 72 | 73 | select 74 | statement_date, 75 | fct_executed_statements.job_key, 76 | dim_table_view_references.referenced_view_or_table, 77 | dim_table_view_references.layer_used, 78 | dim_bq_users.user_key, 79 | dim_bq_users.principal_email, 80 | dim_bq_users.user_type, 81 | fct_executed_statements.user_agent_key, 82 | fct_executed_statements.total_billed_bytes, 83 | fct_executed_statements.total_slot_ms 84 | from fct_executed_statements 85 | inner join dim_table_view_references 86 | on fct_executed_statements.job_key = dim_table_view_references.job_key 87 | inner join dim_bq_users 88 | on fct_executed_statements.user_key = dim_bq_users.user_key 89 | ), 90 | 91 | deduped_base as( 92 | select 93 | referenced_view_or_table, 94 | statement_date 95 | from base 96 | qualify row_number() over(partition by statement_date, referenced_view_or_table) = 1 97 | ), 98 | 99 | daily_service_account_queries as( 100 | select 101 | base.referenced_view_or_table as ref_table, 102 | calendar.date_day, 103 | coalesce(count(base.job_key), 0) as query_count, 104 | from base 105 | left outer join calendar on calendar.date_day = base.statement_date and date(base.statement_date) > calendar.date_day - 7 106 | where base.user_type = 'Service Account' 107 | group by 1,2 108 | ), 109 | 110 | daily_dbt_queries as( 111 | select 112 | base.referenced_view_or_table as ref_table, 113 | calendar.date_day, 114 | coalesce(count(dim_job_labels.label_value),0) as query_count, 115 | from base 116 | left outer join calendar on calendar.date_day = base.statement_date and date(base.statement_date) > calendar.date_day - 7 117 | left outer join dim_job_labels as dim_job_labels on base.job_key = dim_job_labels.job_key and dim_job_labels.label_key = 'dbt_invocation_id' 118 | where base.user_type = 'Service Account' 119 | group by 1,2 120 | ), 121 | 122 | daily_egress_use as( 123 | select 124 | base.referenced_view_or_table as ref_table, 125 | calendar.date_day, 126 | sum( 127 | case 128 | when dim_user_agents.client_type in ({{ "\'" + var('leaner_query_importance_query_score_1')|join("\', \'") + "\'"}}) then 1 129 | when dim_user_agents.client_type in ({{ "\'" + var('leaner_query_importance_query_score_2')|join("\', \'") + "\'"}}) then 2 130 | when dim_user_agents.client_type in ({{ "\'" + var('leaner_query_importance_query_score_3')|join("\', \'") + "\'"}}) then 3 131 | when dim_user_agents.client_type in ({{ "\'" + var('leaner_query_importance_query_score_4')|join("\', \'") + "\'"}}) then 4 132 | else 0 133 | end 134 | ) as query_score 135 | from base 136 | left outer join calendar on calendar.date_day = base.statement_date and date(base.statement_date) > calendar.date_day - 7 137 | inner join dim_user_agents on base.user_agent_key = dim_user_agents.user_agent_key 138 | where base.user_type = 'Service Account' 139 | group by 1,2 140 | 141 | ), 142 | 143 | median_service_account_queries as( 144 | select 145 | *, 146 | percentile_cont(query_count, 0.5 ignore nulls) over(partition by ref_table) as median_query_count 147 | from daily_service_account_queries 148 | ), 149 | 150 | median_dbt_queries as( 151 | select 152 | *, 153 | percentile_cont(query_count, 0.5 ignore nulls) over(partition by ref_table) as median_query_count 154 | from daily_dbt_queries 155 | ), 156 | 157 | median_egress_use as( 158 | select 159 | *, 160 | percentile_cont(query_score, 0.5 ignore nulls) over(partition by ref_table) as median_query_count 161 | from daily_egress_use 162 | ), 163 | 164 | median_active_users as( 165 | select 166 | *, 167 | percentile_cont(human_monthly_active_users_30_day_cnt, 0.5 ignore nulls) over(partition by table_name) as median_active_user_count 168 | from int_table_active_users 169 | ), 170 | 171 | percent_rank_service_account_queries as( 172 | select 173 | ref_table, 174 | date_day, 175 | median_query_count, 176 | percent_rank() over (order by median_query_count asc) as perc_rnk 177 | from median_service_account_queries 178 | group by 1,2,3 179 | ), 180 | 181 | percent_rank_dbt_queries as( 182 | select 183 | ref_table, 184 | date_day, 185 | median_query_count, 186 | percent_rank() over (order by median_query_count asc) as perc_rnk 187 | from median_dbt_queries 188 | group by 1,2,3 189 | ), 190 | 191 | percent_rank_egress_use as( 192 | select 193 | ref_table, 194 | date_day, 195 | median_query_count, 196 | percent_rank() over (order by median_query_count asc) as perc_rnk 197 | from median_egress_use 198 | group by 1,2,3 199 | ), 200 | 201 | percent_rank_active_users as( 202 | select 203 | table_name as ref_table, 204 | event_date as date_day, 205 | median_active_user_count, 206 | percent_rank() over (order by median_active_user_count asc) as perc_rnk 207 | from median_active_users 208 | group by 1,2,3 209 | ), 210 | 211 | final as( 212 | select 213 | deduped_base.referenced_view_or_table as table_name, 214 | deduped_base.statement_date as score_date, 215 | percent_rank_service_account_queries.median_query_count as service_acct_median_queries, 216 | percent_rank_service_account_queries.perc_rnk as service_acct_percent_rank, 217 | percent_rank_dbt_queries.median_query_count as dbt_median_queries, 218 | percent_rank_dbt_queries.perc_rnk as dbt_percent_rank, 219 | percent_rank_egress_use.median_query_count as egress_median_queries, 220 | percent_rank_egress_use.perc_rnk as egress_percent_rank, 221 | percent_rank_active_users.median_active_user_count as active_user_median, 222 | percent_rank_active_users.perc_rnk as active_user_percent_rank, 223 | ((percent_rank_service_account_queries.perc_rnk * {{ var('leaner_query_weight_importance__service_account_queries') }}) + 224 | (percent_rank_dbt_queries.perc_rnk * {{ var('leaner_query_weight_importance__dbt_queries') }}) + 225 | (percent_rank_egress_use.perc_rnk * {{ var('leaner_query_weight_importance__egress_use') }}) + 226 | (percent_rank_active_users.perc_rnk * {{ var('leaner_query_weight_importance__user_breadth') }})) as importance_score 227 | from deduped_base 228 | left outer join percent_rank_service_account_queries on deduped_base.referenced_view_or_table = percent_rank_service_account_queries.ref_table 229 | and deduped_base.statement_date = percent_rank_service_account_queries.date_day 230 | left outer join percent_rank_dbt_queries on deduped_base.referenced_view_or_table = percent_rank_dbt_queries.ref_table 231 | and deduped_base.statement_date = percent_rank_dbt_queries.date_day 232 | left outer join percent_rank_egress_use on deduped_base.referenced_view_or_table = percent_rank_egress_use.ref_table 233 | and deduped_base.statement_date = percent_rank_egress_use.date_day 234 | left outer join percent_rank_active_users on deduped_base.referenced_view_or_table = percent_rank_active_users.ref_table 235 | and deduped_base.statement_date = percent_rank_active_users.date_day 236 | 237 | ) 238 | 239 | select * 240 | from final 241 | -------------------------------------------------------------------------------- /models/intermediate/int_table_active_users.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | cluster_by = ['table_name'], 11 | partition_by={ 12 | "field": "event_date", 13 | "data_type": "date", 14 | "granularity": "day" 15 | }, 16 | materialized = 'incremental', 17 | partitions = partitions_to_replace, 18 | incremental_strategy = 'insert_overwrite' 19 | ) }} 20 | 21 | with source as( 22 | select * 23 | from {{ ref('stg_bigquery_audit_log__data_access') }} 24 | where principal_email not like '%.iam.gserviceaccount.com' 25 | {% if is_incremental() %} 26 | and date(event_timestamp) >= current_date - 3 27 | {% endif %} 28 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 29 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 30 | {% endif %} 31 | 32 | ), 33 | 34 | tables as ( 35 | select distinct 36 | job_id, 37 | referenced_table as table_name, 38 | from source 39 | cross join unnest(referenced_tables) as referenced_table 40 | ), 41 | 42 | base_joined as ( 43 | select distinct 44 | date(event_timestamp) as event_date, 45 | table_name, 46 | principal_email 47 | from source 48 | left outer join tables 49 | on source.job_id = tables.job_id 50 | ), 51 | 52 | first_date as ( 53 | select min(date(event_timestamp)) as min_date 54 | from source 55 | ), 56 | 57 | users as ( 58 | select distinct principal_email, 59 | from source 60 | ), 61 | 62 | dim_date as ( 63 | select date_day as event_date 64 | from {{ ref('dim_leaner_query_date') }} as dim_date 65 | cross join first_date 66 | where date_day between min_date and current_date 67 | ), 68 | 69 | single_tables as ( 70 | select distinct table_name, 71 | from tables 72 | ), 73 | 74 | base_tables as ( 75 | select distinct 76 | event_date, 77 | table_name, 78 | principal_email 79 | from dim_date 80 | cross join single_tables 81 | cross join users 82 | ), 83 | 84 | active_users as ( 85 | select 86 | bt.*, 87 | count(if(bj.principal_email is not null, bt.principal_email, null)) 88 | over (partition by bt.table_name, bt.principal_email 89 | order by bt.event_date rows between 29 preceding and current row) 90 | as num_days_active 91 | from base_tables as bt 92 | left outer join base_joined as bj 93 | on bt.event_date = bj.event_date 94 | and bt.table_name = bj.table_name 95 | and bt.principal_email = bj.principal_email 96 | ), 97 | 98 | user_frequency as ( 99 | select 100 | event_date, 101 | principal_email, 102 | table_name, 103 | case 104 | when num_days_active >= 5 then "weekly" 105 | when num_days_active > 0 then "monthly" 106 | else "other" 107 | end as user_activity_status 108 | from active_users 109 | ), 110 | 111 | final as ( 112 | select 113 | event_date, 114 | table_name, 115 | sum(count(distinct if(user_activity_status = "weekly", principal_email, null))) 116 | over (order by event_date rows between 29 preceding and current row) 117 | as human_weekly_active_users_30_day_cnt, 118 | sum(count(distinct if(user_activity_status = "monthly", principal_email, null))) 119 | over (order by event_date rows between 29 preceding and current row) 120 | as human_monthly_active_users_30_day_cnt 121 | from user_frequency 122 | group by 1,2 123 | ) 124 | 125 | select * 126 | from final 127 | -------------------------------------------------------------------------------- /models/intermediate/int_threat_calculation.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | cluster_by = ['referenced_view_or_table'], 11 | partition_by={ 12 | "field": "statement_date", 13 | "data_type": "date", 14 | "granularity": "day" 15 | }, 16 | materialized = 'incremental', 17 | partitions = partitions_to_replace, 18 | incremental_strategy = 'insert_overwrite' 19 | ) }} 20 | 21 | with statements as ( 22 | 23 | select * 24 | from {{ ref('fct_executed_statements') }} 25 | where 1=1 26 | {% if is_incremental() %} 27 | and date(statement_date) >= current_date - 3 28 | {% endif %} 29 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 30 | and date(statement_date) >= current_date - {{ var('leaner_query_dev_limit_days') }} 31 | {% endif %} 32 | 33 | ), 34 | 35 | jobs as ( 36 | 37 | select * 38 | from {{ ref('dim_job') }} 39 | 40 | ), 41 | 42 | table_refs as ( 43 | 44 | select * 45 | from {{ ref('dim_job_table_view_references') }} 46 | where object_type = 'table' 47 | 48 | ), 49 | 50 | users as ( 51 | 52 | select * 53 | from {{ ref('dim_bigquery_users') }} 54 | 55 | ), 56 | 57 | base as ( 58 | 59 | select 60 | statement_date, 61 | table_refs.referenced_view_or_table, 62 | table_refs.layer_used, 63 | users.principal_email, 64 | users.user_type, 65 | statements.total_billed_bytes, 66 | statements.total_slot_ms, 67 | if(statements.error_message_key = {{ generate_surrogate_key(["'NONE'", "'NONE'"]) }}, false, true) as is_errored, 68 | jobs.dbt_execution_type 69 | from statements 70 | inner join table_refs 71 | on statements.job_key = table_refs.job_key 72 | left outer join jobs 73 | on statements.job_key = jobs.job_key 74 | inner join users 75 | on statements.user_key = users.user_key 76 | ), 77 | 78 | deduped_base as( 79 | select 80 | referenced_view_or_table, 81 | statement_date 82 | from base 83 | qualify row_number() over(partition by statement_date, referenced_view_or_table) = 1 84 | ), 85 | 86 | min_event_date as ( 87 | 88 | select min(statement_date) as min_date 89 | from base 90 | 91 | ), 92 | 93 | calendar as ( 94 | select date_day 95 | from {{ ref('dim_leaner_query_date') }} 96 | inner join min_event_date 97 | on date_day between min_date and current_date 98 | 99 | ), 100 | 101 | egress_use_counts as ( 102 | 103 | select 104 | referenced_view_or_table, 105 | calendar.date_day, 106 | layer_used, 107 | coalesce(if(layer_used != 'prod', count(principal_email) * 5, count(principal_email)),0) as count_queries_with_multiplier 108 | from base 109 | left outer join calendar 110 | on calendar.date_day = base.statement_date 111 | and date(base.statement_date) > calendar.date_day - 7 112 | where user_type = 'Service Account' 113 | {% if var('leaner_query_custom_egress_emails')| length > 0 -%} 114 | and principal_email in {{ build_custom_egress_email_list() }} 115 | {%- endif -%} 116 | group by 1,2,3 117 | 118 | ), 119 | 120 | percent_rank_egress_use as ( 121 | 122 | select 123 | referenced_view_or_table, 124 | count_queries_with_multiplier, 125 | date_day, 126 | percent_rank() over (order by count_queries_with_multiplier asc) as percent_rank_count_queries_with_multiplier 127 | from egress_use_counts 128 | 129 | ), 130 | 131 | cost_to_query as ( 132 | 133 | select 134 | base.referenced_view_or_table, 135 | calendar.date_day, 136 | coalesce(sum({{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }}),0) as sum_daily_query_cost, 137 | 138 | from base 139 | left outer join calendar 140 | on calendar.date_day = base.statement_date 141 | and date(base.statement_date) > calendar.date_day - 7 142 | where user_type = 'User' 143 | group by 1,2 144 | order by 1,2 145 | 146 | ), 147 | 148 | median_cost_to_query as ( 149 | 150 | select distinct 151 | referenced_view_or_table, 152 | date_day, 153 | sum_daily_query_cost, 154 | percentile_cont(sum_daily_query_cost, 0.5 ignore nulls) over (partition by referenced_view_or_table) as sum_daily_query_cost_median 155 | from cost_to_query 156 | 157 | ), 158 | 159 | percent_rank_cost_to_query as ( 160 | 161 | select 162 | referenced_view_or_table, 163 | date_day, 164 | sum_daily_query_cost_median, 165 | percent_rank() over (order by sum_daily_query_cost_median asc) as percent_rank_sum_daily_query_cost 166 | from median_cost_to_query 167 | 168 | ), 169 | 170 | cost_to_build as ( 171 | 172 | select 173 | base.referenced_view_or_table, 174 | calendar.date_day, 175 | coalesce(sum({{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }}),0) as sum_daily_build_cost 176 | from base 177 | left outer join calendar 178 | on calendar.date_day = base.statement_date 179 | and date(base.statement_date) > calendar.date_day - 7 180 | where user_type = 'Service Account' 181 | and dbt_execution_type is not null 182 | group by 1,2 183 | order by 1,2 184 | 185 | ), 186 | 187 | median_cost_to_build as ( 188 | 189 | select distinct 190 | referenced_view_or_table, 191 | date_day, 192 | sum_daily_build_cost, 193 | percentile_cont(sum_daily_build_cost, 0.5 ignore nulls) over (partition by referenced_view_or_table) as sum_daily_build_cost_median 194 | from cost_to_build 195 | 196 | ), 197 | 198 | percent_rank_cost_to_build as ( 199 | 200 | select 201 | referenced_view_or_table, 202 | date_day, 203 | sum_daily_build_cost_median, 204 | percent_rank() over (order by sum_daily_build_cost_median asc) as percent_rank_daily_build_cost 205 | from median_cost_to_build 206 | 207 | ), 208 | 209 | daily_errors as ( 210 | 211 | select 212 | base.referenced_view_or_table, 213 | calendar.date_day, 214 | coalesce(count(is_errored),0) as count_errors 215 | from base 216 | left outer join calendar 217 | on calendar.date_day = base.statement_date 218 | and date(base.statement_date) > calendar.date_day - 7 219 | where is_errored = true 220 | group by 1,2 221 | order by 1,2 222 | 223 | ), 224 | 225 | median_daily_errors as ( 226 | 227 | select distinct 228 | referenced_view_or_table, 229 | count_errors, 230 | date_day, 231 | percentile_cont(count_errors, 0.5 ignore nulls) over (partition by referenced_view_or_table) as median_count_daily_errors 232 | from daily_errors 233 | 234 | ), 235 | 236 | percent_rank_daily_errors as ( 237 | 238 | select 239 | referenced_view_or_table, 240 | median_count_daily_errors, 241 | date_day, 242 | percent_rank() over (order by median_count_daily_errors asc) as percent_rank_daily_errors 243 | from median_daily_errors 244 | 245 | ), 246 | 247 | joined as ( 248 | 249 | select 250 | deduped_base.referenced_view_or_table, 251 | deduped_base.statement_date, 252 | coalesce(egress.count_queries_with_multiplier, 0) as count_queries_with_multiplier, 253 | coalesce(egress.percent_rank_count_queries_with_multiplier, 0) as percent_rank_count_queries_with_multiplier, 254 | coalesce(cost_query.sum_daily_query_cost_median, 0) as sum_daily_query_cost_median, 255 | coalesce(cost_query.percent_rank_sum_daily_query_cost, 0) as percent_rank_sum_daily_query_cost, 256 | coalesce(cost_build.sum_daily_build_cost_median, 0) as sum_daily_build_cost_median, 257 | coalesce(cost_build.percent_rank_daily_build_cost, 0) as percent_rank_daily_build_cost, 258 | coalesce(errors.median_count_daily_errors, 0) as median_count_daily_errors, 259 | coalesce(errors.percent_rank_daily_errors, 0) as percent_rank_daily_errors, 260 | ((coalesce(egress.percent_rank_count_queries_with_multiplier, 0) * {{ var("leaner_query_weight_threat__service_account_egress") }}) 261 | + (coalesce(cost_query.percent_rank_sum_daily_query_cost, 0) * {{ var("leaner_query_weight_threat__cost_to_query") }}) 262 | + (coalesce(cost_build.percent_rank_daily_build_cost, 0) * {{ var("leaner_query_weight_threat__cost_to_build") }}) 263 | + (coalesce(percent_rank_daily_errors, 0) * {{ var("leaner_query_weight_threat__daily_errors") }})) as threat_score 264 | from deduped_base 265 | left outer join percent_rank_egress_use as egress 266 | on deduped_base.referenced_view_or_table = egress.referenced_view_or_table 267 | and deduped_base.statement_date = egress.date_day 268 | left outer join percent_rank_cost_to_query as cost_query 269 | on deduped_base.referenced_view_or_table = cost_query.referenced_view_or_table 270 | and deduped_base.statement_date = cost_query.date_day 271 | left outer join percent_rank_cost_to_build as cost_build 272 | on deduped_base.referenced_view_or_table = cost_build.referenced_view_or_table 273 | and deduped_base.statement_date = cost_build.date_day 274 | left outer join percent_rank_daily_errors as errors 275 | on deduped_base.referenced_view_or_table = errors.referenced_view_or_table 276 | and deduped_base.statement_date = errors.date_day 277 | 278 | ), 279 | 280 | final as ( 281 | 282 | select * 283 | from joined 284 | where threat_score > 0 285 | 286 | ) 287 | 288 | select * from final 289 | -------------------------------------------------------------------------------- /models/marts/bigquery_audit_log.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dim_job_table_view_references 5 | description: All of the unique tables and views that have been queried 6 | 7 | columns: 8 | - name: job_key 9 | description: Foreign key for dim_job - surrogate of job_id 10 | tests: 11 | - not_null 12 | 13 | - name: referenced_view_or_table 14 | description: The table or view being referenced 15 | 16 | - name: object_type 17 | description: Whether the object being referenced is a table or view 18 | 19 | - name: job_id 20 | description: Id of the job/query 21 | 22 | - name: layer_used 23 | description: Details which layer the table/view was being referenced from 24 | 25 | - name: dim_bigquery_users 26 | description: Tracks user_key to principal_email 27 | 28 | columns: 29 | - name: user_key 30 | description: Surrogate key off principal_email 31 | tests: 32 | - not_null 33 | - unique 34 | 35 | - name: principal_email 36 | description: User email initiating bigquery job 37 | 38 | - name: username 39 | description: String that comes before the "@" in the email 40 | 41 | - name: user_type 42 | description: Whether the user is a service account or a real user 43 | 44 | - name: project_id 45 | description: BQ project associated with the user ID. 46 | 47 | - name: dim_user_agents 48 | description: Houses user agents and clients that are connecting to bigquery 49 | 50 | columns: 51 | - name: user_agent_key 52 | description: Surrogate key - built from caller_supplied_user_agent 53 | tests: 54 | - not_null 55 | 56 | - name: caller_supplied_user_agent 57 | description: user agent supplied with the job 58 | 59 | - name: client_type 60 | description: Translated client type, determined from the user agent and pincipal email 61 | 62 | - name: dim_error_messages 63 | description: Houses error codes and messages 64 | 65 | columns: 66 | - name: error_message_key 67 | description: Surrogate key - built from error_result_code and error_result_message 68 | 69 | - name: error_result_code 70 | description: The Warehouse's code used to describe the error 71 | 72 | - name: error_result_message 73 | description: The Warehouse's error message 74 | 75 | - name: dim_job_labels 76 | description: Houses label keys, values, and job ids 77 | tests: 78 | - unique_combination_of_columns: 79 | combination_of_columns: 80 | - job_key 81 | - label_key 82 | 83 | columns: 84 | - name: job_key 85 | description: Foreign key for dim_job - surrogate of job_id 86 | tests: 87 | - not_null 88 | 89 | - name: label_key 90 | description: Key from the label key/value pair 91 | 92 | - name: label_value 93 | description: Value from the label key/value pair 94 | 95 | - name: dim_job 96 | description: > 97 | Houses information on job and query info as well as dbt execution info. 98 | columns: 99 | - name: job_key 100 | description: Foreign key for dim_job - surrogate of job_id 101 | tests: 102 | - not_null 103 | 104 | - name: dim_query_statements 105 | description: > 106 | Houses query statements at a job level. 107 | columns: 108 | - name: job_key 109 | description: Foreign key for dim_job - surrogate of job_id 110 | tests: 111 | - not_null 112 | 113 | - name: fct_executed_statements 114 | description: > 115 | Has keys for jobs, error messages, users, and user_agents, as well as 116 | some info around usage and billing. 117 | -------------------------------------------------------------------------------- /models/marts/dim_bigquery_users.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key = 'user_key', 4 | cluster_by = ['user_type', 'user_key'], 5 | materialized = 'incremental', 6 | ) 7 | }} 8 | 9 | -- if this is too big, we may want to make this incremental 10 | with source as ( 11 | select * 12 | from {{ ref('stg_bigquery_audit_log__data_access') }} 13 | where 1=1 14 | {% if is_incremental() %} 15 | and date(event_timestamp) >= current_date - 3 16 | {% endif %} 17 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 18 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 19 | {% endif %} 20 | 21 | ), 22 | 23 | user_emails as ( 24 | select distinct 25 | principal_email, 26 | project_id 27 | from source 28 | ) 29 | 30 | select distinct 31 | principal_email, 32 | project_id, 33 | regexp_extract(principal_email, '([^@]+)') as username, 34 | case 35 | when principal_email like '%.iam.gserviceaccount.com' then 'Service Account' 36 | else 'User' 37 | end as user_type, 38 | {{ generate_surrogate_key([ 39 | 'principal_email', 'project_id' 40 | ]) }} as user_key 41 | from user_emails 42 | -------------------------------------------------------------------------------- /models/marts/dim_error_messages.sql: -------------------------------------------------------------------------------- 1 | {# error messages and code, fill with "NONE" if no error message #} 2 | {{ 3 | config( 4 | unique_key = 'error_message_key', 5 | cluster_by = 'error_message_key', 6 | materialized='incremental' 7 | 8 | ) }} 9 | 10 | 11 | with data_access as ( 12 | 13 | select * 14 | from {{ ref('stg_bigquery_audit_log__data_access') }} 15 | where 1=1 16 | {% if is_incremental() %} 17 | and date(event_timestamp) >= current_date - 3 18 | {% endif %} 19 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 20 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 21 | {% endif %} 22 | 23 | ), 24 | 25 | error_messages as ( 26 | 27 | select distinct 28 | coalesce(error_result_code, 'NONE') as error_result_code, 29 | coalesce(error_result_message, 'NONE') as error_result_message 30 | from data_access 31 | 32 | ), 33 | 34 | final as ( 35 | 36 | select 37 | {{ generate_surrogate_key([ 38 | 'error_result_code' 39 | , 'error_result_message' 40 | ]) }} as error_message_key, 41 | * 42 | from error_messages 43 | 44 | ) 45 | 46 | select * 47 | from final 48 | -------------------------------------------------------------------------------- /models/marts/dim_job.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key = ['job_key', 'caller_ip_address'], 4 | cluster_by = ['job_key', 'grafana_dashboard_id', 'grafana_panel_id'], 5 | materialized = 'incremental' 6 | ) }} 7 | 8 | with data_access as ( 9 | 10 | select * 11 | from {{ ref('stg_bigquery_audit_log__data_access') }} 12 | where 1=1 13 | {% if is_incremental() %} 14 | and date(event_timestamp) >= current_date - 3 15 | {% endif %} 16 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 17 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 18 | {% endif %} 19 | 20 | ), 21 | 22 | extract_json as( 23 | select 24 | *, 25 | replace(replace(regexp_extract(query_statement, r'^(\/\* \{+?[\w\W]+?\} \*\/)'), '/', ''), '*', '') dbt_info 26 | from data_access 27 | ), 28 | 29 | dbt_statements as( 30 | select 31 | *, 32 | json_extract_scalar(dbt_info, '$.dbt_version') as dbt_version, 33 | json_extract_scalar(dbt_info, '$.profile_name') as dbt_profile_name, 34 | json_extract_scalar(dbt_info, '$.target_name') as dbt_target_name, 35 | json_extract_scalar(dbt_info, '$.node_id') as dbt_model_name 36 | from extract_json 37 | ), 38 | 39 | add_dbt_context as( 40 | select 41 | *, 42 | case 43 | when dbt_model_name like 'model.%' then 'DBT_RUN' 44 | when dbt_model_name like 'snapshot.%' then 'DBT_SNAPSHOT' 45 | when dbt_model_name like 'test.%' then 'DBT_TEST' 46 | end as dbt_execution_type 47 | from dbt_statements 48 | ), 49 | 50 | adjust_modelname as( 51 | select 52 | *, 53 | concat(split(dbt_model_name, '.')[safe_offset(1)], '.',split(dbt_model_name, '.')[safe_offset(2)]) as dbt_adjusted_model_name 54 | from add_dbt_context 55 | ), 56 | 57 | final as ( 58 | 59 | select distinct 60 | grafana_dashboard_id, 61 | grafana_panel_id, 62 | job_id, 63 | project_id, 64 | event_type, 65 | resource_name, 66 | caller_ip_address, 67 | method_name, 68 | create_disposition, 69 | statement_type, 70 | query_priority, 71 | dbt_version, 72 | dbt_profile_name, 73 | dbt_target_name, 74 | dbt_execution_type, 75 | dbt_adjusted_model_name as dbt_model_name, 76 | cache_hit 77 | from adjust_modelname 78 | ) 79 | 80 | select 81 | {{ generate_surrogate_key ([ 82 | 'job_id' 83 | ]) }} as job_key, 84 | * 85 | from final 86 | -------------------------------------------------------------------------------- /models/marts/dim_job_labels.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key = ['job_key', 'label_key'], 4 | cluster_by = ['job_key', 'label_key'], 5 | materialized = 'incremental' 6 | ) 7 | }} 8 | 9 | with source as ( 10 | 11 | select * 12 | from {{ ref('stg_bigquery_audit_log__data_access') }} 13 | where 1=1 14 | {% if is_incremental() %} 15 | and date(event_timestamp) >= current_date - 3 16 | {% endif %} 17 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 18 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 19 | {% endif %} 20 | 21 | ), 22 | 23 | split_labels as ( 24 | 25 | select 26 | job_id, 27 | split(regexp_replace(config_labels, r'[{}"]', ""), ",") as split_config 28 | from source 29 | 30 | ), 31 | 32 | unnested as ( 33 | 34 | select 35 | job_id, 36 | sconfig 37 | from split_labels 38 | cross join unnest(split_config) as sconfig 39 | 40 | ), 41 | 42 | final as ( 43 | 44 | select 45 | job_id, 46 | split(sconfig, ":")[offset(0)] as label_key, 47 | split(sconfig, ":")[offset(1)] as label_value 48 | from unnested 49 | 50 | ) 51 | 52 | select distinct 53 | {{ 54 | generate_surrogate_key ([ 55 | 'job_id' 56 | ]) }} as job_key, 57 | label_key, 58 | label_value 59 | from final 60 | -------------------------------------------------------------------------------- /models/marts/dim_job_table_view_references.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key = ['job_key', 'referenced_view_or_table', 'layer_used', 'object_type'], 4 | cluster_by = ['job_key', 'referenced_view_or_table'], 5 | materialized = 'incremental' 6 | ) 7 | }} 8 | 9 | with source as ( 10 | select * 11 | from {{ ref('stg_bigquery_audit_log__data_access') }} 12 | where 1=1 13 | {% if is_incremental() %} 14 | and date(event_timestamp) >= current_date - 3 15 | {% endif %} 16 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 17 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 18 | {% endif %} 19 | 20 | ), 21 | 22 | views as ( 23 | select distinct 24 | job_id, 25 | referenced_view as referenced_view_or_table, 26 | "view" as object_type 27 | from source 28 | cross join unnest(referenced_views) as referenced_view 29 | ), 30 | 31 | tables as ( 32 | select distinct 33 | job_id, 34 | referenced_table as referenced_view_or_table, 35 | "table" as object_type 36 | from source 37 | cross join unnest(referenced_tables) as referenced_table 38 | ), 39 | 40 | unioned as ( 41 | select * 42 | from views 43 | 44 | union all 45 | 46 | select * 47 | from tables 48 | ), 49 | 50 | object_split as ( 51 | select *, 52 | split(referenced_view_or_table, '/')[safe_offset(1)] as project_id, 53 | split(referenced_view_or_table, '/')[safe_offset(3)] as dataset_id, 54 | split(referenced_view_or_table, '/')[safe_offset(5)] as table_or_view_id 55 | from unioned 56 | ), 57 | 58 | add_layer as( 59 | select *, 60 | concat(project_id, '.', dataset_id, '.', table_or_view_id) as qualified_table_name, 61 | {{ determine_data_layer('dataset_id') }} as layer_used 62 | from object_split 63 | ) 64 | 65 | select distinct 66 | {{ generate_surrogate_key([ 67 | 'job_id' 68 | ]) }} as job_key, 69 | * except(job_id) 70 | from add_layer 71 | -------------------------------------------------------------------------------- /models/marts/dim_query_statements.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key = ['job_key'], 4 | cluster_by = ['job_key', 'query_statement'], 5 | materialized = 'incremental' 6 | ) }} 7 | 8 | with data_access as ( 9 | 10 | select * 11 | from {{ ref('stg_bigquery_audit_log__data_access') }} 12 | where 1=1 13 | {% if is_incremental() %} 14 | and date(event_timestamp) >= current_date - 3 15 | {% endif %} 16 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 17 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 18 | {% endif %} 19 | 20 | ), 21 | 22 | extract_json as( 23 | select 24 | *, 25 | replace(replace(regexp_extract(query_statement, r'^(\/\* \{+?[\w\W]+?\} \*\/)'), '/', ''), '*', '') dbt_info 26 | from data_access 27 | ), 28 | 29 | final as ( 30 | select distinct 31 | query_statement, 32 | job_id, 33 | dbt_info, 34 | from extract_json 35 | ) 36 | 37 | select 38 | {{ generate_surrogate_key ([ 39 | 'job_id', 40 | ])}} as job_key, 41 | * 42 | from final -------------------------------------------------------------------------------- /models/marts/dim_user_agents.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key = ['user_agent_key', 'client_type'], 4 | cluster_by = ['user_agent_key', 'client_type'], 5 | materialized = 'incremental' 6 | ) 7 | }} 8 | 9 | with source as( 10 | select * 11 | from {{ ref('stg_bigquery_audit_log__data_access') }} 12 | where 1=1 13 | {% if is_incremental() %} 14 | and date(event_timestamp) >= current_date - 3 15 | {% endif %} 16 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 17 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 18 | {% endif %} 19 | 20 | ), 21 | 22 | final as( 23 | select distinct 24 | caller_supplied_user_agent, 25 | principal_email, 26 | project_id, 27 | case 28 | {% for custom_client in var('leaner_query_custom_clients') %} 29 | {{ build_client_type(custom_client) }} 30 | {% endfor %} 31 | 32 | when json_extract_scalar(config_labels, '$.sheets_trigger') = 'user' 33 | then 'Connected Sheet - User Initiated' 34 | when json_extract_scalar(config_labels, '$.sheets_trigger') = 'schedule' 35 | then 'Connected Sheet - Scheduled' 36 | when json_extract_scalar(config_labels, '$.data_source_id') = 'scheduled_query' 37 | then 'Scheduled Query' 38 | when caller_supplied_user_agent like 'Mozilla%' then 'Web console' 39 | when json_extract_scalar(config_labels, '$.dbt_invocation_id') is not null 40 | or caller_supplied_user_agent like 'dbt%' then 'dbt run' 41 | 42 | when caller_supplied_user_agent like 'gl-python%' then 'Python Client' 43 | when caller_supplied_user_agent like 'Fivetran%' then 'Fivetran' 44 | when caller_supplied_user_agent like 'Hightouch%' then 'Hightouch' 45 | when caller_supplied_user_agent like 'gcloud-golang-bigquery%' 46 | and principal_email like 'rudderstack%' 47 | then 'Rudderstack' 48 | when caller_supplied_user_agent like 'gcloud-golang%' 49 | or caller_supplied_user_agent like 'google-api-go%' 50 | then 'Golang Client' 51 | when caller_supplied_user_agent like 'gcloud-node%' then 'Node Client' 52 | when caller_supplied_user_agent like 'SimbaJDBCDriver%' then 'Java Client' 53 | when caller_supplied_user_agent like 'google-cloud-sdk%' 54 | then 'Google Cloud SDK' 55 | 56 | else coalesce(caller_supplied_user_agent, 'Unknown') 57 | end as client_type 58 | from source 59 | ) 60 | 61 | select 62 | {{ generate_surrogate_key(['caller_supplied_user_agent', 'principal_email', 'project_id']) }} as user_agent_key, 63 | * 64 | from final 65 | -------------------------------------------------------------------------------- /models/marts/fct_executed_statements.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | cluster_by = ['job_key', 'user_key', 'user_agent_key'], 11 | partition_by={ 12 | "field": "statement_date", 13 | "data_type": "date", 14 | "granularity": "day" 15 | }, 16 | materialized = 'incremental', 17 | partitions = partitions_to_replace, 18 | incremental_strategy = 'insert_overwrite' 19 | ) }} 20 | 21 | with source as ( 22 | 23 | select * 24 | from {{ ref('stg_bigquery_audit_log__data_access') }} 25 | where 1=1 26 | {% if is_incremental() %} 27 | and date(event_timestamp) >= current_date - 3 28 | {% endif %} 29 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 30 | and date(event_timestamp) >= current_date - {{ var('leaner_query_dev_limit_days') }} 31 | {% endif %} 32 | 33 | ), 34 | 35 | final as ( 36 | 37 | select distinct 38 | start_time, 39 | date(start_time) as statement_date, 40 | end_time, 41 | total_processed_bytes, 42 | total_billed_bytes, 43 | output_row_count, 44 | total_slot_ms, 45 | billing_tier, 46 | coalesce(error_result_code, 'NONE') as error_result_code, 47 | coalesce(error_result_message, 'NONE') as error_result_message, 48 | job_id, 49 | principal_email, 50 | project_id, 51 | caller_supplied_user_agent 52 | from source 53 | 54 | ) 55 | 56 | select 57 | {{ generate_surrogate_key ([ 58 | 'job_id' 59 | ]) }} as job_key, 60 | {{ generate_surrogate_key([ 61 | 'error_result_code' 62 | , 'error_result_message' 63 | ]) }} as error_message_key, 64 | {{ generate_surrogate_key([ 65 | 'principal_email', 66 | 'project_id' 67 | ]) }} as user_key, 68 | {{ generate_surrogate_key([ 69 | 'caller_supplied_user_agent' 70 | , 'principal_email' 71 | , 'project_id' 72 | ]) }} as user_agent_key, 73 | * except(error_result_code, 74 | error_result_message, 75 | job_id, 76 | principal_email, 77 | project_id, 78 | caller_supplied_user_agent) 79 | from final 80 | -------------------------------------------------------------------------------- /models/reports/reports.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: rpt_bigquery_user_metrics_daily 5 | description: > 6 | Aggregates queries run, errors, types of tables ran, cost, and execution 7 | time by each user, user_type, and project ID on a daily grain. 8 | 9 | - name: rpt_bigquery_usage_cost_daily 10 | description: > 11 | Aggregates queries run, cost, dbt build/test cost, and execution time 12 | by client_type and project ID on a daily grain. 13 | 14 | - name: rpt_bigquery_table_usage_daily 15 | description: > 16 | Incremental model aggregating the number of queries run split by 17 | user_type and execution_type at a grain of day, table, layer and client_type. 18 | 19 | - name: rpt_bigquery_query_alerting 20 | description: Model used for Grafana alerting on specific queries. 21 | 22 | - name: rpt_bigquery_dbt_metrics_daily 23 | description: > 24 | Aggregates dbt costs and build times for each table, target and model 25 | on a dialy grain. -------------------------------------------------------------------------------- /models/reports/rpt_bigquery_dbt_metrics_daily.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | enable = var("leaner_query_enable_reports"), 11 | require_partition_filter = var("leaner_query_require_partition_by_reports"), 12 | partition_by = { 13 | "field": "report_date", 14 | "data_type": "date", 15 | "granularity": "day" 16 | }, 17 | cluster_by = ['dbt_model_name'] 18 | ) 19 | }} 20 | 21 | with fct_executed_statements as ( 22 | select * 23 | from {{ ref('fct_executed_statements') }} 24 | where 1=1 25 | {% if is_incremental() %} 26 | and date(statement_date) in ({{ partitions_to_replace | join(',') }}) 27 | {% endif %} 28 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 29 | and date(statement_date) >= current_date - {{ var('leaner_query_dev_limit_days') }} 30 | {% endif %} 31 | ), 32 | 33 | min_event_date as ( 34 | select min(statement_date) as min_date 35 | from fct_executed_statements 36 | ), 37 | 38 | calendar as ( 39 | select date_day 40 | from {{ ref('dim_leaner_query_date') }} 41 | inner join min_event_date on date_day between min_date and current_date 42 | ), 43 | 44 | dim_job as( 45 | select * 46 | from {{ ref('dim_job') }} 47 | ), 48 | 49 | dim_job_table_view_references as ( 50 | select * 51 | from {{ ref('dim_job_table_view_references') }} 52 | ), 53 | 54 | dim_job_labels as( 55 | select * 56 | from {{ ref('dim_job_labels') }} 57 | ), 58 | 59 | aggregate_by_dbt_invocation as ( 60 | select 61 | calendar.date_day as report_date, -- calendar 62 | dim_job.dbt_model_name, 63 | dim_job_labels.label_value as dbt_invocation_id, 64 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_RUN"), 1,0)), 0) as dbt_builds, 65 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_TEST"),1,0)), 0) as dbt_tests, 66 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_SNAPSHOT"),1,0)), 0) as dbt_snapshots, 67 | 68 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_RUN"),{{ calc_bq_cost('total_billed_bytes') }},0)), 0) as estimated_dbt_run_cost_usd, 69 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_TEST"),{{ calc_bq_cost('total_billed_bytes') }},0)), 0) as estimated_dbt_test_cost_usd, 70 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_SNAPSHOT"),{{ calc_bq_cost('total_billed_bytes') }},0)), 0) as estimated_dbt_snapshot_cost_usd, 71 | 72 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_RUN"),(timestamp_diff(fct_executed_statements.end_time, fct_executed_statements.start_time, millisecond)),0)), 0) as estimated_dbt_build_time_ms, 73 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_TEST"), (timestamp_diff(fct_executed_statements.end_time, fct_executed_statements.start_time, millisecond)),0)), 0) as estimated_dbt_test_time_ms, 74 | coalesce(sum(if((dim_job.dbt_execution_type = "DBT_SNAPSHOT"),(timestamp_diff(fct_executed_statements.end_time, fct_executed_statements.start_time, millisecond)),0)), 0) as estimated_dbt_snapshot_time_ms, 75 | from calendar 76 | inner join fct_executed_statements 77 | on calendar.date_day = fct_executed_statements.statement_date 78 | inner join dim_job_labels 79 | on fct_executed_statements.job_key = dim_job_labels.job_key 80 | inner join dim_job 81 | on fct_executed_statements.job_key = dim_job.job_key 82 | where dim_job.dbt_execution_type is not null 83 | and label_key = 'dbt_invocation_id' 84 | group by 1,2,3 85 | ), 86 | 87 | aggregates as( 88 | select 89 | report_date, 90 | dbt_model_name, 91 | coalesce(sum(if(dbt_builds > 0, 1,0)),0) as total_dbt_builds, 92 | coalesce(sum(if(dbt_tests > 0, 1,0)),0) as total_dbt_tests, 93 | coalesce(sum(if(dbt_snapshots > 0, 1,0)),0) as total_dbt_snapshots, 94 | 95 | coalesce(sum(estimated_dbt_run_cost_usd), 0) as total_estimated_dbt_run_cost_usd, 96 | coalesce(sum(estimated_dbt_test_cost_usd), 0) as total_estimated_dbt_test_cost_usd, 97 | coalesce(sum(estimated_dbt_snapshot_cost_usd), 0) as total_estimated_dbt_snapshot_cost_usd, 98 | 99 | coalesce(sum(estimated_dbt_build_time_ms), 0) as total_estimated_dbt_build_time_ms, 100 | coalesce(sum(estimated_dbt_test_time_ms), 0) as total_estimated_dbt_test_time_ms, 101 | coalesce(sum(estimated_dbt_snapshot_time_ms), 0) as total_estimated_dbt_snapshot_time_ms 102 | from aggregate_by_dbt_invocation 103 | group by 1,2 104 | ), 105 | 106 | final as( 107 | select 108 | *, 109 | round((coalesce(safe_divide(total_estimated_dbt_run_cost_usd, total_dbt_builds), 0.0)), 3) as average_build_cost, 110 | round((coalesce(safe_divide(total_estimated_dbt_test_cost_usd, total_dbt_tests), 0.0)), 3) as average_test_cost, 111 | round((coalesce(safe_divide(total_estimated_dbt_snapshot_cost_usd, total_dbt_snapshots), 0.0)), 3) as average_snapshot_cost, 112 | 113 | round((coalesce(safe_divide(total_estimated_dbt_build_time_ms, total_dbt_builds), 0.0)), 3) as average_build_time_ms, 114 | round((coalesce(safe_divide(total_estimated_dbt_test_time_ms, total_dbt_tests), 0.0)), 3) as average_test_time_ms, 115 | round((coalesce(safe_divide(total_estimated_dbt_snapshot_time_ms, total_dbt_snapshots), 0.0)), 3) as average_snapshot_time_ms 116 | 117 | from aggregates 118 | ) 119 | 120 | select * 121 | from final 122 | -------------------------------------------------------------------------------- /models/reports/rpt_bigquery_query_alerting.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'timestamp(timestamp_add(current_timestamp, interval -3 day))', 3 | 'timestamp(timestamp_add(current_timestamp, interval -2 day))', 4 | 'timestamp(timestamp_add(current_timestamp, interval -1 day))', 5 | 'timestamp(current_timestamp)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | enable = var("leaner_query_enable_reports"), 11 | require_partition_filter = var("leaner_query_require_partition_by_reports"), 12 | materialized='incremental', 13 | cluster_by = ['username', 'user_type'], 14 | partition_by={ 15 | "field": "start_time", 16 | "data_type": "timestamp", 17 | "granularity": "day" 18 | }, 19 | partitions = partitions_to_replace, 20 | incremental_strategy = 'insert_overwrite' 21 | ) 22 | }} 23 | 24 | with statements as ( 25 | select * 26 | from {{ ref('fct_executed_statements') }} 27 | where 1=1 28 | {% if is_incremental() %} 29 | and timestamp(statement_date) in ({{ partitions_to_replace | join(',') }}) 30 | {% endif %} 31 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 32 | and date(statement_date) >= current_date - {{ var('leaner_query_dev_limit_days') }} 33 | {% endif %} 34 | ), 35 | 36 | users as ( 37 | select * 38 | from {{ ref('dim_bigquery_users') }} 39 | ), 40 | 41 | query_statements as ( 42 | select * 43 | from {{ ref('dim_query_statements') }} 44 | ), 45 | 46 | final as ( 47 | select 48 | statements.start_time, 49 | users.username, 50 | users.user_type, 51 | {{calc_bq_cost('total_billed_bytes', 'total_slot_ms')}} as query_cost, 52 | query_statements.query_statement 53 | from statements 54 | inner join users 55 | on users.user_key = statements.user_key 56 | inner join query_statements 57 | on query_statements.job_key = statements.job_key 58 | where date(statements.start_time) >= current_date -2 59 | ) 60 | 61 | select * 62 | from final 63 | -------------------------------------------------------------------------------- /models/reports/rpt_bigquery_table_usage_daily.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | enable = var("leaner_query_enable_reports"), 11 | require_partition_filter = var("leaner_query_require_partition_by_reports"), 12 | materialized='incremental', 13 | cluster_by = ['layer', 'client_type'], 14 | partition_by={ 15 | "field": "report_date", 16 | "data_type": "date", 17 | "granularity": "day" 18 | }, 19 | partitions = partitions_to_replace, 20 | incremental_strategy = 'insert_overwrite' 21 | ) }} 22 | 23 | with fct_executed_statements as ( 24 | select * 25 | from {{ ref('fct_executed_statements') }} 26 | ), 27 | 28 | min_event_date as ( 29 | select min(statement_date) as min_date 30 | from fct_executed_statements 31 | ), 32 | 33 | dim_job as ( 34 | select * 35 | from {{ ref('dim_job') }} 36 | ), 37 | 38 | dim_bq_users as ( 39 | select * 40 | from {{ ref('dim_bigquery_users') }} 41 | ), 42 | 43 | dim_user_agents as ( 44 | select * 45 | from {{ ref('dim_user_agents') }} 46 | ), 47 | 48 | calendar as ( 49 | select date_day 50 | from {{ ref('dim_leaner_query_date') }} 51 | inner join min_event_date on date_day between min_date and current_date 52 | 53 | where 1=1 54 | {% if is_incremental() %} 55 | and date_day in ({{ partitions_to_replace | join(',') }}) 56 | {% endif %} 57 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 58 | and date_day >= current_date - {{ var('leaner_query_dev_limit_days') }} 59 | {% endif %} 60 | ), 61 | 62 | dim_job_table_view_references as( 63 | select * 64 | from {{ ref('dim_job_table_view_references') }} 65 | ), 66 | 67 | unique_tables as ( 68 | select 69 | distinct referenced_view_or_table as table_name 70 | from dim_job_table_view_references 71 | ), 72 | 73 | threat_calculation as ( 74 | select * 75 | from {{ ref('int_threat_calculation') }} 76 | ), 77 | 78 | importance_calculation as ( 79 | select * 80 | from {{ ref('int_importance_calculations') }} 81 | ), 82 | 83 | aggregates as ( 84 | select 85 | calendar.date_day as report_date, -- calendar 86 | unique_tables.table_name, -- dim_job_table 87 | dim_job_table_view_references.project_id, 88 | dim_job_table_view_references.dataset_id, 89 | dim_job_table_view_references.table_or_view_id, 90 | dim_job_table_view_references.qualified_table_name, 91 | dim_job_table_view_references.layer_used as layer, -- dim_job_table 92 | dim_user_agents.client_type, -- dim_user_agents 93 | coalesce(count(fct_executed_statements.job_key), 0) as total_queries_run, -- cnt(job_key) from fct_executed_statements 94 | count(distinct if(dim_job.dbt_execution_type = "DBT_RUN", dim_job.dbt_model_name, null)) as dbt_models_run, 95 | count(distinct if(dim_job.dbt_execution_type = "DBT_TEST", dim_job.dbt_model_name, null)) as dbt_tests_run, 96 | count(distinct if(dim_bq_users.user_type = "User", fct_executed_statements.user_key, null)) as total_human_users, 97 | count(distinct if(dim_bq_users.user_type = "Service Account", fct_executed_statements.user_key, null)) as total_service_accounts, 98 | coalesce(sum(case when fct_executed_statements.error_message_key !={{ generate_surrogate_key(["'NONE'", "'NONE'"]) }} then 1 else 0 end), 0) as total_errors, 99 | from calendar 100 | cross join unique_tables 101 | left outer join dim_job_table_view_references 102 | on unique_tables.table_name = dim_job_table_view_references.referenced_view_or_table 103 | left outer join fct_executed_statements 104 | on dim_job_table_view_references.job_key = fct_executed_statements.job_key 105 | and calendar.date_day = fct_executed_statements.statement_date 106 | left outer join dim_user_agents 107 | on fct_executed_statements.user_agent_key = dim_user_agents.user_agent_key 108 | left outer join dim_bq_users 109 | on fct_executed_statements.user_key = dim_bq_users.user_key 110 | left outer join dim_job 111 | on fct_executed_statements.job_key = dim_job.job_key 112 | group by 1, 2, 3, 4, 5, 6, 7, 8 113 | ), 114 | 115 | final as ( 116 | select distinct 117 | aggregates.*, 118 | threat_calculation.threat_score, 119 | importance_calculation.importance_score, 120 | (importance_calculation.importance_score * {{ var('leaner_query_priority_importance_level_weight') }}) 121 | + (threat_calculation.threat_score * {{ var('leaner_query_priority_threat_level_weight') }}) 122 | as priority_score 123 | from aggregates 124 | left outer join threat_calculation 125 | on aggregates.table_name = threat_calculation.referenced_view_or_table 126 | and aggregates.report_date = threat_calculation.statement_date 127 | left outer join importance_calculation 128 | on aggregates.table_name = importance_calculation.table_name 129 | and aggregates.report_date = importance_calculation.score_date 130 | ) 131 | 132 | select * 133 | from final 134 | -------------------------------------------------------------------------------- /models/reports/rpt_bigquery_usage_cost_daily.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | enable = var("leaner_query_enable_reports"), 11 | require_partition_filter = var("leaner_query_require_partition_by_reports"), 12 | partition_by = { 13 | "field": "report_date", 14 | "data_type": "date", 15 | "granularity": "day" 16 | }, 17 | cluster_by = ['client_type'] 18 | ) 19 | }} 20 | 21 | with fct_executed_statements as ( 22 | select * 23 | from {{ ref('fct_executed_statements') }} 24 | ), 25 | 26 | min_event_date as ( 27 | select min(statement_date) as min_date 28 | from fct_executed_statements 29 | ), 30 | 31 | dim_user_agents as ( 32 | select * 33 | from {{ ref('dim_user_agents') }} 34 | ), 35 | 36 | calendar as ( 37 | select date_day 38 | from {{ ref('dim_leaner_query_date') }} 39 | inner join min_event_date on date_day between min_date and current_date 40 | where 1=1 41 | {% if is_incremental() %} 42 | and date_day in ({{ partitions_to_replace | join(',') }}) 43 | {% endif %} 44 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 45 | and date_day >= current_date - {{ var('leaner_query_dev_limit_days') }} 46 | {% endif %} 47 | 48 | ), 49 | 50 | dim_job as( 51 | select * 52 | from {{ ref('dim_job') }} 53 | ), 54 | 55 | aggregates as ( 56 | select 57 | calendar.date_day as report_date, -- calendar 58 | dim_user_agents.client_type, -- dim_user_agents 59 | dim_user_agents.project_id, -- BQ project name 60 | coalesce(count(fct_executed_statements.job_key), 0) as total_queries_run, -- cnt(job_key) from fct_executed_statements 61 | coalesce(sum({{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }}), 0) as total_estimated_cost_usd, 62 | coalesce(sum(if(dim_job.dbt_execution_type = "DBT_RUN",{{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }},0)), 0) as total_estimated_dbt_run_build_cost_usd, 63 | coalesce(sum(if(dim_job.dbt_execution_type = "DBT_TEST",{{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }},0)), 0) as total_estimated_dbt_test_build_cost_usd, 64 | coalesce(sum(fct_executed_statements.total_slot_ms), 0) as total_time_ms, 65 | coalesce(sum(if(lower(dim_job.statement_type) = "select",{{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }},0)), 0) as total_estimated_query_cost_usd, 66 | coalesce(sum(if(lower(dim_job.statement_type) = "select", fct_executed_statements.total_slot_ms, 0)), 0) as total_query_time_ms 67 | from calendar 68 | cross join dim_user_agents 69 | left outer join fct_executed_statements 70 | on dim_user_agents.user_agent_key = fct_executed_statements.user_agent_key 71 | and calendar.date_day = fct_executed_statements.statement_date 72 | left outer join dim_job 73 | on fct_executed_statements.job_key = dim_job.job_key 74 | group by 1, 2, 3 75 | ) 76 | 77 | select * 78 | from aggregates 79 | -------------------------------------------------------------------------------- /models/reports/rpt_bigquery_user_metrics_daily.sql: -------------------------------------------------------------------------------- 1 | {% set partitions_to_replace = [ 2 | 'date(date_add(current_date, interval -3 day))', 3 | 'date(date_add(current_date, interval -2 day))', 4 | 'date(date_add(current_date, interval -1 day))', 5 | 'date(current_date)' 6 | ] %} 7 | 8 | {{ 9 | config( 10 | enable = var("leaner_query_enable_reports"), 11 | require_partition_filter = var("leaner_query_require_partition_by_reports"), 12 | cluster_by = ['username', 'user_type'], 13 | partition_by={ 14 | "field": "report_date", 15 | "data_type": "date", 16 | "granularity": "day" 17 | }, 18 | ) }} 19 | 20 | with fct_executed_statements as ( 21 | select * 22 | from {{ ref('fct_executed_statements') }} 23 | ), 24 | 25 | min_event_date as( 26 | select min(statement_date) as min_date 27 | from fct_executed_statements 28 | ), 29 | 30 | calendar as ( 31 | select date_day 32 | from {{ ref('dim_leaner_query_date') }} 33 | inner join min_event_date on date_day between min_date and current_date 34 | where 1=1 35 | {% if is_incremental() %} 36 | and date_day in ({{ partitions_to_replace | join(',') }}) 37 | {% endif %} 38 | {% if target.name in var('leaner_query_dev_target_name') and var('leaner_query_enable_dev_limits') %} 39 | and date_day >= current_date - {{ var('leaner_query_dev_limit_days') }} 40 | {% endif %} 41 | 42 | ), 43 | 44 | dim_bq_users as( 45 | select * 46 | from {{ ref('dim_bigquery_users') }} 47 | ), 48 | 49 | dim_job as( 50 | select * 51 | from {{ ref('dim_job') }} 52 | ), 53 | 54 | dim_job_table_view_references as( 55 | select * 56 | from {{ ref('dim_job_table_view_references') }} 57 | ), 58 | 59 | tables_used_per_job as ( 60 | select 61 | fct_executed_statements.job_key, 62 | sum(case when dim_job_table_view_references.layer_used = 'prod' then 1 else 0 end) as prod_tables_used, 63 | sum(case when dim_job_table_view_references.layer_used = 'stage' then 1 else 0 end) as stage_tables_used, 64 | sum(case when dim_job_table_view_references.layer_used = 'raw' then 1 else 0 end) as raw_tables_used, 65 | coalesce(count(fct_executed_statements.job_key), 0) as total_tables_used 66 | from fct_executed_statements 67 | inner join dim_job_table_view_references on fct_executed_statements.job_key = dim_job_table_view_references.job_key 68 | group by 1 69 | ), 70 | 71 | aggregates as( 72 | select 73 | calendar.date_day as report_date, 74 | dim_bq_users.username, 75 | dim_bq_users.user_type, 76 | dim_bq_users.project_id, 77 | coalesce(count(fct_executed_statements.job_key), 0) as total_queries_run, 78 | coalesce(sum(case when fct_executed_statements.error_message_key !={{ generate_surrogate_key(["'NONE'", "'NONE'"]) }} then 1 else 0 end), 0) as total_errors, 79 | coalesce(sum(prod_tables_used), 0) as total_prod_tables_used, 80 | coalesce(sum(stage_tables_used), 0) as total_stage_tables_used, 81 | coalesce(sum(raw_tables_used), 0) as total_raw_tables_used, 82 | coalesce(sum(total_tables_used), 0) as total_tables_used, 83 | coalesce(sum({{ calc_bq_cost('total_billed_bytes', 'total_slot_ms') }}), 0) as total_estimated_cost_usd, 84 | coalesce(sum(total_slot_ms), 0) as total_time_ms 85 | from calendar 86 | cross join dim_bq_users 87 | left outer join fct_executed_statements on fct_executed_statements.user_key = dim_bq_users.user_key and fct_executed_statements.statement_date = calendar.date_day 88 | left outer join dim_job on fct_executed_statements.job_key = dim_job.job_key and lower(dim_job.statement_type) = 'select' 89 | left outer join tables_used_per_job on fct_executed_statements.job_key = tables_used_per_job.job_key 90 | group by 1, 2, 3, 4 91 | ), 92 | 93 | final as( 94 | select 95 | *, 96 | round((coalesce(safe_divide(total_errors, total_queries_run), 0.0) * 100), 3) as error_rate, 97 | round((coalesce(safe_divide(total_prod_tables_used, total_tables_used), 0.0) * 100), 3) as prod_table_use_rate, 98 | round((coalesce(safe_divide(total_stage_tables_used, total_tables_used), 0.0) * 100), 3) as stage_table_use_rate, 99 | round((coalesce(safe_divide(total_raw_tables_used, total_tables_used), 0.0) * 100), 3) as raw_table_use_rate 100 | from aggregates 101 | ) 102 | 103 | select * 104 | from final 105 | -------------------------------------------------------------------------------- /models/staging/bigquery_audit_log/stg_bigquery_audit_log.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_bigquery_audit_log__data_access 5 | description: > 6 | Table capturing bigquery data access and usage information. This table allows us to determine how expensive (in dollars and bytes) 7 | jobs are within BigQuery as well as determine usage metrics at the project, dataset, table, and field level. 8 | columns: 9 | - name: insert_id 10 | description: Identifier for an event within the BQ logs 11 | 12 | - name: job_id 13 | description: > 14 | Identifier for the BQ job being logged. Not all statements are Jobs so this field may be null. The values are taken 15 | from the job_name sub_field within the metadataJson field; composing the second and fourth item when splitting by '/' 16 | 17 | - name: event_type 18 | description: > 19 | Identifies the type of BiqQuery event captured. Can be one of 'JobChangeEvent', 'TableDataReadEvent', 'OtherEvent'. 20 | Only 'JobChangeEvent' and 'TableDataReadEvent' are modeled further 21 | 22 | - name: event_timestamp 23 | description: When the event was logged 24 | 25 | - name: resource_name 26 | description: Fully qualified URI name that the event was logged against 27 | 28 | - name: principal_email 29 | description: The user (individual or service account) that ran the statement being logged 30 | 31 | - name: project_id 32 | description: project_id that the event ran against 33 | 34 | - name: dataset_id 35 | description: dataset that the event ran against 36 | 37 | - name: location 38 | description: location that the event ran for 39 | 40 | - name: caller_ip_address 41 | description: ip address of the client issuing the event 42 | 43 | - name: caller_supplied_user_agent 44 | description: The user agent of the caller, which is captured by the google sdk or api. Helpful in determining how statements are being issued. 45 | 46 | - name: service_name 47 | description: The name of the API service performing the operation. 48 | 49 | - name: method_name 50 | description: The name of the service method or operation. 51 | 52 | - name: metadata_json 53 | description: Contains the JSON representation of the event message 54 | 55 | - name: grafana_dashboard_id 56 | description: The id of the grafana dashboard where the bigquery job was run 57 | 58 | - name: grafana_panel_id 59 | description: The id of the grafana panel where the bigquery job was run -------------------------------------------------------------------------------- /models/staging/bigquery_audit_log/stg_bigquery_audit_log__data_access.sql: -------------------------------------------------------------------------------- 1 | with unioned as ( 2 | {%- for audit_log_source in var('leaner_query_sources') %} 3 | select 4 | insertId as insert_id, 5 | json_query(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.labels.x-dashboard-uid") as grafana_dashboard_id, 6 | json_query(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.labels.x-panel-id") as grafana_panel_id, 7 | coalesce( 8 | concat( 9 | split( 10 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobName'), "/" 11 | )[safe_offset(1)], 12 | ":", 13 | split( 14 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobName'), "/" 15 | )[safe_offset(3)] 16 | ) 17 | ) as job_id, 18 | case 19 | when json_query(protopayload_auditlog.metadataJson, '$.jobChange') is not null then 'JobChangeEvent' 20 | end as event_type, 21 | timestamp as event_timestamp, 22 | protopayload_auditlog.resourceName as resource_name, 23 | coalesce(protopayload_auditlog.authenticationInfo.principalEmail, 'UNKNOWN') as principal_email, 24 | resource.labels.project_id as project_id, 25 | resource.labels.dataset_id as dataset_id, 26 | resource.labels.location as location, 27 | protopayload_auditlog.requestMetadata.callerIp as caller_ip_address, 28 | protopayload_auditlog.requestMetadata.callerSuppliedUserAgent as caller_supplied_user_agent, 29 | protopayload_auditlog.serviceName as service_name, 30 | protopayload_auditlog.methodName as method_name, 31 | -- -- protopayload_auditlog.metadataJson as protopayload_auditlog.metadataJson, 32 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobStatus.jobState') as job_state, 33 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.type') as config_type, 34 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobStatus.errorResult.code') as error_result_code, 35 | json_value(protopayload_auditlog.metadataJson, 36 | '$.jobChange.job.jobStatus.errorResult.message') as error_result_message, 37 | timestamp(json_value(protopayload_auditlog.metadataJson, 38 | '$.jobChange.job.jobStats.createTime')) as create_time, 39 | timestamp(json_value(protopayload_auditlog.metadataJson, 40 | '$.jobChange.job.jobStats.startTime')) as start_time, 41 | timestamp(json_value(protopayload_auditlog.metadataJson, 42 | '$.jobChange.job.jobStats.endTime')) as end_time, 43 | safe_cast(json_value(protopayload_auditlog.metadataJson, 44 | '$.jobChange.job.jobStats.totalSlotMs') as int64) as total_slot_ms, 45 | json_value(protopayload_auditlog.metadataJson, 46 | '$.jobChange.job.jobStats.queryStats.totalProcessedBytes') as total_processed_bytes, 47 | safe_cast(json_value(protopayload_auditlog.metadataJson, 48 | '$.jobChange.job.jobStats.queryStats.totalBilledBytes') 49 | as int64) as total_billed_bytes, 50 | json_value(protopayload_auditlog.metadataJson, 51 | '$.jobChange.job.jobStats.queryStats.billingTier') as billing_tier, 52 | split(trim(trim(json_query(protopayload_auditlog.metadataJson, 53 | '$.jobChange.job.jobStats.queryStats.referencedTables'), 54 | '["'), '"]'), '","') as referenced_tables, 55 | split(trim(trim(json_query(protopayload_auditlog.metadataJson, 56 | '$.jobChange.job.jobStats.queryStats.referencedViews'), 57 | '["'), '"]'), '","') as referenced_views, 58 | json_value(protopayload_auditlog.metadataJson, 59 | '$.jobChange.job.jobStats.queryStats.outputRowCount') as output_row_count, 60 | safe_cast(json_value(protopayload_auditlog.metadataJson, 61 | '$.jobChange.job.jobStats.queryStats.cacheHit') as bool) as cache_hit, 62 | safe_cast(json_value(protopayload_auditlog.metadataJson, 63 | '$.jobChange.job.jobStats.loadStats.totalOutputBytes') 64 | as int64) as total_output_bytes, 65 | json_query(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.labels') as config_labels, 66 | json_value(protopayload_auditlog.metadataJson, 67 | '$.jobChange.job.jobConfig.queryConfig.query') as query_statement, 68 | safe_cast(json_value(protopayload_auditlog.metadataJson, 69 | '$.jobChange.job.jobConfig.queryConfig.queryTruncated') 70 | as bool) as is_query_truncated, 71 | coalesce(json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.queryConfig.destinationTable'), 72 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.loadConfig.destinationTable') 73 | ) as destination_table, 74 | coalesce(json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.queryConfig.createDisposition'), 75 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.loadConfig.createDisposition')) 76 | as create_disposition, 77 | coalesce(json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.queryConfig.writeDisposition'), 78 | json_value(protopayload_auditlog.metadataJson, '$.jobChange.job.jobConfig.loadConfig.writeDisposition')) 79 | as write_disposition, 80 | json_value(protopayload_auditlog.metadataJson, 81 | '$.jobChange.job.jobConfig.queryConfig.defaultDataset') as default_dataset, 82 | json_value(protopayload_auditlog.metadataJson, 83 | '$.jobChange.job.jobConfig.queryConfig.priority') as query_priority, 84 | json_value(protopayload_auditlog.metadataJson, 85 | '$.jobChange.job.jobConfig.queryConfig.statementType') as statement_type, 86 | from {{ audit_log_source }} 87 | where json_query(protopayload_auditlog.metadataJson, '$.jobChange') is not null 88 | {% if not loop.last -%} 89 | union all 90 | {%- endif -%} 91 | {%- endfor %} 92 | ) 93 | 94 | select * 95 | from unioned -------------------------------------------------------------------------------- /models/utils/dim_leaner_query_date.sql: -------------------------------------------------------------------------------- 1 | -- Heavily leveraged from the gitlab analytics team's date dimension: 2 | -- https://discourse.getdbt.com/t/building-a-calendar-table-using-dbt/325 3 | -- https://gitlab.com/gitlab-data/analytics/-/blob/master/transform/snowflake-dbt/models/sources/date/date_details_source.sql 4 | 5 | WITH date_spine AS ( 6 | 7 | SELECT * 8 | FROM 9 | UNNEST(GENERATE_DATE_ARRAY('2010-01-01', '2050-01-01', INTERVAL 1 DAY)) AS date_day 10 | 11 | ), 12 | 13 | formatted_dates AS ( 14 | 15 | SELECT 16 | date_day, 17 | date_day AS date_actual, 18 | CAST( 19 | date_day AS timestamp 20 | ) AS start_of_day_timestamp, 21 | TIMESTAMP_SUB(CAST(DATE_ADD(date_day, INTERVAL 1 DAY) AS timestamp), INTERVAL 1 SECOND) AS end_of_day_timestamp, 22 | FORMAT_DATE('%A', date_day) AS day_name, 23 | FORMAT_DATE('%B', date_day) AS month_name, 24 | FORMAT_DATE( 25 | '%b', date_day 26 | ) AS abbreviated_month_name, 27 | FORMAT_DATE( 28 | "%b'%g", date_day 29 | ) AS formatted_month_year_name, 30 | 31 | EXTRACT(MONTH FROM date_day) AS month_actual, 32 | EXTRACT(YEAR FROM date_day) AS year_actual, 33 | EXTRACT(QUARTER FROM date_day) AS quarter_actual, 34 | EXTRACT(DAY FROM date_day) AS day_of_month, 35 | EXTRACT(DAYOFWEEK FROM date_day) AS day_of_week, 36 | 37 | DATE_TRUNC(date_day, MONTH) AS first_day_of_month, 38 | DATE_TRUNC(date_day, WEEK) AS first_day_of_week, 39 | EXTRACT(WEEK FROM date_day) + 1 AS week_of_year, 40 | 41 | LAST_DAY(date_day, MONTH) AS last_day_of_month, 42 | LAST_DAY(date_day, YEAR) AS last_day_of_year, 43 | LAST_DAY(date_day, QUARTER) AS last_day_of_quarter, 44 | 45 | CASE WHEN EXTRACT(MONTH FROM date_day) = 1 THEN 12 46 | ELSE EXTRACT(MONTH FROM date_day) - 1 END AS month_of_fiscal_year, 47 | 48 | SAFE_CAST(CASE WHEN EXTRACT(MONTH FROM date_day) = 1 AND EXTRACT(DAY FROM date_day) = 1 THEN 'New Years Day' 49 | WHEN EXTRACT(MONTH FROM date_day) = 12 AND EXTRACT(DAY FROM date_day) = 25 THEN 'Christmas Day' 50 | WHEN EXTRACT(MONTH FROM date_day) = 12 AND EXTRACT(DAY FROM date_day) = 26 THEN 'Boxing Day' 51 | ELSE NULL END AS STRING) AS holiday_desc 52 | FROM date_spine 53 | 54 | ), 55 | 56 | dates_with_fiscal_calcs AS( 57 | SELECT 58 | *, 59 | -- timestamp used for joins to snapshots or anything with valid_to / valid_from 60 | -- creates a timestamp that is the last second of each day, unless it is the current date 61 | -- if current date, use current_timestamp 62 | -- if future date, leave null 63 | CASE 64 | WHEN date_day < CURRENT_DATE THEN end_of_day_timestamp 65 | WHEN date_day = CURRENT_DATE THEN CURRENT_TIMESTAMP 66 | END AS snapshot_join_timestamp, 67 | CASE WHEN month_actual < 2 68 | THEN year_actual 69 | ELSE (year_actual + 1) END AS fiscal_year, 70 | CASE WHEN month_actual < 2 THEN '4' 71 | WHEN month_actual < 5 THEN '1' 72 | WHEN month_actual < 8 THEN '2' 73 | WHEN month_actual < 11 THEN '3' 74 | ELSE '4' END 75 | AS fiscal_quarter 76 | FROM formatted_dates 77 | ), 78 | 79 | dates_with_aggregates AS ( 80 | SELECT 81 | *, 82 | 83 | fiscal_year || '-' || EXTRACT( 84 | MONTH FROM date_day 85 | ) AS fiscal_month_name, 86 | ( 87 | fiscal_year || '-' || 'Q' || fiscal_quarter 88 | ) AS fiscal_quarter_name, 89 | ( 90 | first_day_of_week = date_day 91 | ) AS is_first_day_of_week, 92 | ( 93 | last_day_of_month = date_day 94 | ) AS is_last_day_of_month, 95 | (day_of_week BETWEEN 2 AND 6) AS is_weekday, 96 | 97 | 98 | ROW_NUMBER() OVER (PARTITION BY year_actual, quarter_actual ORDER BY date_day) AS day_of_quarter, 99 | ROW_NUMBER() OVER (PARTITION BY year_actual ORDER BY date_day) AS day_of_year, 100 | 101 | ROW_NUMBER() OVER ( 102 | PARTITION BY fiscal_year, fiscal_quarter ORDER BY date_day 103 | ) AS day_of_fiscal_quarter, 104 | ROW_NUMBER() OVER ( 105 | PARTITION BY fiscal_year ORDER BY date_day 106 | ) AS day_of_fiscal_year, 107 | 108 | FIRST_VALUE( 109 | date_day 110 | ) OVER (PARTITION BY year_actual ORDER BY date_day) AS first_day_of_year, 111 | 112 | FIRST_VALUE( 113 | date_day 114 | ) OVER (PARTITION BY year_actual, quarter_actual ORDER BY date_day) AS first_day_of_quarter, 115 | 116 | FIRST_VALUE(date_day) OVER (PARTITION BY fiscal_year, fiscal_quarter 117 | ORDER BY date_day ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 118 | ) AS first_day_of_fiscal_quarter, 119 | LAST_VALUE(date_day) OVER (PARTITION BY fiscal_year, fiscal_quarter 120 | ORDER BY date_day ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 121 | ) AS last_day_of_fiscal_quarter, 122 | 123 | FIRST_VALUE(date_day) OVER (PARTITION BY fiscal_year 124 | ORDER BY date_day ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 125 | ) AS first_day_of_fiscal_year, 126 | LAST_VALUE(date_day) OVER (PARTITION BY fiscal_year 127 | ORDER BY date_day ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_day_of_fiscal_year 128 | FROM dates_with_fiscal_calcs 129 | ), 130 | 131 | final AS( 132 | SELECT 133 | *, 134 | {{ generate_surrogate_key(['date_actual']) }} AS date_key, 135 | DATE_DIFF( 136 | date_actual, first_day_of_fiscal_year, WEEK 137 | ) + 1 AS week_of_fiscal_year, 138 | DATE_DIFF( 139 | date_actual, first_day_of_fiscal_quarter, WEEK 140 | ) + 1 AS week_of_fiscal_quarter, 141 | 142 | LAST_VALUE(date_day) OVER (PARTITION BY first_day_of_week 143 | ORDER BY date_day ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_day_of_week, 144 | 145 | (week_of_year - EXTRACT(WEEK FROM first_day_of_month)) AS week_of_month, 146 | (year_actual || '-Q' || EXTRACT(QUARTER FROM date_day)) AS quarter_name, 147 | 148 | ( 149 | 'FY' || SUBSTR(fiscal_quarter_name, 3, 7) 150 | ) AS fiscal_quarter_name_fy, 151 | DENSE_RANK() OVER ( 152 | ORDER BY fiscal_quarter_name 153 | ) AS fiscal_quarter_number_absolute, 154 | 155 | ( 156 | 'FY' || SUBSTR(fiscal_month_name, 3, 8) 157 | ) AS fiscal_month_name_fy, 158 | 159 | SAFE_CAST(CASE WHEN HOLIDAY_DESC IS NULL THEN 0 160 | ELSE 1 END AS BOOLEAN) AS is_holiday, 161 | DATE_TRUNC( 162 | last_day_of_fiscal_quarter, MONTH 163 | ) AS last_month_of_fiscal_quarter, 164 | IF( 165 | DATE_TRUNC(last_day_of_fiscal_quarter, MONTH) = date_actual, TRUE, FALSE 166 | ) AS is_first_day_of_last_month_of_fiscal_quarter, 167 | DATE_TRUNC( 168 | last_day_of_fiscal_year, MONTH 169 | ) AS last_month_of_fiscal_year, 170 | IF( 171 | DATE_TRUNC(last_day_of_fiscal_year, MONTH) = date_actual, TRUE, FALSE 172 | ) AS is_first_day_of_last_month_of_fiscal_year, 173 | COUNT( 174 | date_actual 175 | ) OVER (PARTITION BY first_day_of_month) AS days_in_month_count, 176 | FROM dates_with_aggregates 177 | 178 | ) 179 | 180 | SELECT * 181 | FROM final 182 | -------------------------------------------------------------------------------- /models/utils/utils.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dim_leaner_query_date 5 | description: Date dimension table that contains all (most) necessary calendar and fiscal date attributes. 6 | columns: 7 | - name: date_key 8 | description: surrogate key, unique per day entry in table 9 | tests: 10 | - not_null 11 | - unique 12 | - name: date_day 13 | description: Calendar date, e.g. '2019-02-02' 14 | tests: 15 | - not_null 16 | - unique 17 | - name: date_actual 18 | description: Matches `date_day`, duplicated for ease of use 19 | tests: 20 | - not_null 21 | - unique 22 | - name: day_name 23 | description: Abbreviated name of the day of the week, e.g. 'Sat' for 2019-02-02 24 | tests: 25 | - not_null 26 | - name: month_name 27 | description: Full name of the month, e.g. 'January' 28 | tests: 29 | - not_null 30 | - name: abbreviated_month_name 31 | description: Abbreviated name of the month, e.g. 'Jan' for 2019-01-10 32 | tests: 33 | - not_null 34 | - name: formatted_month_year_name 35 | description: Formatted abbreviation of month and year, e.g. Jan'19 for 2019-01-10 36 | tests: 37 | - not_null 38 | - name: month_actual 39 | description: Number for the calendar month of the year, e.g. '2' for 2019-02-02 40 | tests: 41 | - not_null 42 | - name: year_actual 43 | description: Calendar year, e.g. '2019' for 2019-02-02 44 | tests: 45 | - not_null 46 | - name: quarter_actual 47 | description: Calendar quarter, e.g. '1' for 2019-02-02 48 | tests: 49 | - not_null 50 | - name: day_of_week 51 | description: Number of the day of the week, with Sunday = 1 and Saturday = 7 52 | tests: 53 | - not_null 54 | - name: first_day_of_week 55 | description: Calendar date of the Sunday of that week, e.g. '2019-01-27' for 2019-02-02 56 | tests: 57 | - not_null 58 | - name: last_day_of_week 59 | description: The Saturday of the week, e.g. '2019-02-02' for 2019-02-02 60 | tests: 61 | - not_null 62 | - name: is_first_day_of_week 63 | description: Is the day the first day (Sunday) of the week it falls in 64 | tests: 65 | - not_null 66 | - name: is_last_day_of_month 67 | description: Is the given day the last day of the month 68 | tests: 69 | - not_null 70 | - name: is_weekday 71 | description: Is the day a weekday (Mon-Fri) 72 | tests: 73 | - not_null 74 | - name: week_of_year 75 | description: Calendar week of year, e.g. '5' for 2019-02-02 76 | tests: 77 | - not_null 78 | - name: week_of_month 79 | description: The week within the month, starting with 1, that the day falls within 80 | tests: 81 | - not_null 82 | - name: day_of_month 83 | description: Day Number of the month, e.g. '2' for 2019-02-02 84 | tests: 85 | - not_null 86 | - name: day_of_quarter 87 | description: Day Number from the start of the calendar quarter, e.g. '33' for 2019-02-02 88 | tests: 89 | - not_null 90 | - name: day_of_year 91 | description: Day Number from the start of the calendar year, e.g. '33' for 2019-02-02 92 | tests: 93 | - not_null 94 | - name: fiscal_year 95 | description: Fiscal year for the date, e.g. '2020' for 2019-02-02 96 | tests: 97 | - not_null 98 | - name: fiscal_quarter 99 | description: Fiscal quarter for the date, e.g. '1' for 2019-02-02 100 | tests: 101 | - not_null 102 | - name: day_of_fiscal_quarter 103 | description: Day Number from the start of the fiscal quarter, e.g. '2' for 2019-02-02 104 | tests: 105 | - not_null 106 | - name: day_of_fiscal_year 107 | description: Day Number from the start of the fiscal year, e.g. '2' for 2019-02-02 108 | tests: 109 | - not_null 110 | - name: days_in_month_count 111 | description: Number of days in the month 112 | - name: first_day_of_month 113 | description: The first day of a calendar month, e.g. '2019-02-01' for 2019-02-02 114 | tests: 115 | - not_null 116 | - name: last_day_of_month 117 | description: The last day of a calendar month, e.g. '2019-02-28' for 2019-02-02 118 | tests: 119 | - not_null 120 | - name: first_day_of_year 121 | description: The first day of a calendar year, e.g. '2019-01-01' for 2019-02-02 122 | tests: 123 | - not_null 124 | - name: last_day_of_year 125 | description: The last day of a calendar year, e.g. '2019-12-31' for 2019-02-02 126 | tests: 127 | - not_null 128 | - name: first_day_of_quarter 129 | description: The first day of a calendar quarter, e.g. '2019-01-01' for 2019-02-02 130 | tests: 131 | - not_null 132 | - name: last_day_of_quarter 133 | description: The last day of a calendar quarter, e.g. '2019-03-31' for 2019-02-02 134 | tests: 135 | - not_null 136 | - name: first_day_of_fiscal_quarter 137 | description: The first day of the fiscal quarter, e.g. '2019-02-01' for 2019-02-02 138 | tests: 139 | - not_null 140 | - name: last_day_of_fiscal_quarter 141 | description: The last day of the fiscal quarter, e.g. '2019-04-30' for 2019-02-02 142 | tests: 143 | - not_null 144 | - name: first_day_of_fiscal_year 145 | description: The first day of the fiscal year, e.g. '2019-02-01' for 2019-02-02 146 | tests: 147 | - not_null 148 | - name: last_day_of_fiscal_year 149 | description: The last day of the fiscal year, e.g. '2020-01-31' for 2019-02-02 150 | tests: 151 | - not_null 152 | - name: week_of_fiscal_year 153 | description: The week number for the fiscal year, e.g. '1' for 2019-02-02 154 | tests: 155 | - not_null 156 | - name: month_of_fiscal_year 157 | description: The month number for the fiscal year, e.g. '1' for 2019-02-02 158 | tests: 159 | - not_null 160 | - name: week_of_fiscal_quarter 161 | description: The quarter number for the fiscal year, e.g. '1' for 2019-03-02 162 | tests: 163 | - not_null 164 | - name: quarter_name 165 | description: The name of the calendar quarter, e.g. '2019-Q1' for 2019-02-02 166 | tests: 167 | - not_null 168 | - name: fiscal_quarter_name 169 | description: The name of the fiscal quarter, e.g '2020-Q1' for 2019-02-02 170 | tests: 171 | - not_null 172 | - name: fiscal_quarter_name_fy 173 | description: The name of the fiscal quarter, e.g 'FY20-Q1' for 2019-02-02 174 | tests: 175 | - not_null 176 | - name: fiscal_quarter_number_absolute 177 | description: Monotonically increasing integer for each fiscal quarter. This allows for comparing the relative order of fiscal quarters. 178 | tests: 179 | - not_null 180 | - name: fiscal_month_name 181 | description: The name of the fiscal month, e.g '2020-Feb' for 2019-02-02 182 | tests: 183 | - not_null 184 | - name: fiscal_month_name_fy 185 | description: The name of the fiscal month, e.g 'FY20-Feb' for 2019-02-02 186 | tests: 187 | - not_null 188 | - name: holiday_desc 189 | description: The name of the holiday, if applicable 190 | - name: is_holiday 191 | description: Whether or not it is a holiday 192 | tests: 193 | - not_null 194 | - name: last_month_of_fiscal_quarter 195 | description: Date indicating last month of fiscal quarter e.g '2020-04-01' for 2020-02-02 196 | tests: 197 | - not_null 198 | - name: is_first_day_of_last_month_of_fiscal_quarter 199 | description: Flag indicating date that is the first day of last month of fiscal quarter. E.g TRUE for '2020-04-01' 200 | - name: last_month_of_fiscal_year 201 | description: Date indicating last month of fiscal year e.g '2021-01-01' for 2020-02-02 202 | tests: 203 | - not_null 204 | - name: is_first_day_of_last_month_of_fiscal_year 205 | description: Flag indicating date that is the first day of last month of fiscal year. E.g TRUE for '2021-01-01' 206 | - name: snapshot_join_timestamp 207 | description: convenience column that is useful in joining to snapshot tables dbt_valid_to and dbt_valid_from columns 208 | - name: start_of_day_timestamp 209 | description: timestamp representing the start of the given day 210 | - name: end_of_day_timestamp 211 | description: timestamp representing the end of the given day 212 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Eventually it would be preferrable to have a specific version to ensure 2 | # that we're all in sync and are preventing dependency pain 3 | 4 | dbt-bigquery==1.3.0 5 | sqlfluff==1.4.2 6 | sqlfluff-templater-dbt==1.4.2 7 | diff-cover==7.2.0 8 | --------------------------------------------------------------------------------