├── .circleci ├── config.yml └── post-diff.js ├── .dockerignore ├── .flake8 ├── .github └── dependabot.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .yamllint.yaml ├── CODEOWNERS ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── architecture ├── lookml.jpg ├── namespaces.jpg └── namespaces_yaml.md ├── bin ├── dev_branches ├── generate └── generator ├── custom-namespaces.yaml ├── docker-compose.yml ├── generator ├── __init__.py ├── __main__.py ├── dashboards │ ├── __init__.py │ ├── dashboard.py │ ├── operational_monitoring_dashboard.py │ └── templates │ │ └── dashboard.lkml ├── dryrun.py ├── explores │ ├── __init__.py │ ├── client_counts_explore.py │ ├── events_explore.py │ ├── explore.py │ ├── funnel_analysis_explore.py │ ├── glean_ping_explore.py │ ├── growth_accounting_explore.py │ ├── metric_definitions_explore.py │ ├── operational_monitoring_explore.py │ ├── ping_explore.py │ └── table_explore.py ├── lkml_update.py ├── lookml.py ├── metrics_utils.py ├── namespaces.py ├── operational_monitoring_utils.py ├── spoke.py ├── utils.py └── views │ ├── __init__.py │ ├── client_counts_view.py │ ├── datagroups.py │ ├── events_view.py │ ├── funnel_analysis_view.py │ ├── glean_ping_view.py │ ├── growth_accounting_view.py │ ├── lookml_utils.py │ ├── metric_definitions_view.py │ ├── operational_monitoring_alerting_view.py │ ├── operational_monitoring_view.py │ ├── ping_view.py │ ├── table_view.py │ └── view.py ├── namespaces-disallowlist.yaml ├── pytest.ini ├── requirements.in ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── conftest.py ├── data └── metric-hub │ └── definitions │ └── fenix.toml ├── test_datagroups.py ├── test_events.py ├── test_funnel_analysis.py ├── test_glean_ping_view.py ├── test_integration.py ├── test_lookml.py ├── test_lookml_utils.py ├── test_namespaces.py ├── test_operational_monitoring.py ├── test_spoke.py └── utils.py /.circleci/post-diff.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | // A script for posting the generated LookML diff to Github from CircleCI. 3 | // This requires GH_AUTH_TOKEN to be set up, along-side CircleCI specific 4 | // variables. See the source at [1] for more details. 5 | // https://github.com/themadcreator/circle-github-bot/blob/master/src/index.ts 6 | 7 | const fs = require("fs"); 8 | const bot = require("circle-github-bot").create(); 9 | const { graphql } = require("@octokit/graphql"); 10 | const path = require("path"); 11 | 12 | const diff_file = "lookml.diff"; 13 | const graphql_authorized = graphql.defaults({ 14 | headers: { 15 | authorization: `token ${process.env.GH_AUTH_TOKEN}`, 16 | }, 17 | }); 18 | // Github comments can have a maximum length of 65536 characters 19 | const max_content_length = 65000; 20 | 21 | async function minimize_pr_diff_comments() { 22 | if (!process.env.CIRCLE_PULL_REQUEST) { 23 | return; 24 | } 25 | const { viewer } = await graphql_authorized( 26 | `query { 27 | viewer { 28 | login 29 | } 30 | }` 31 | ); 32 | const { repository } = await graphql_authorized( 33 | `query($repo_owner:String!, $repo_name:String!, $pr_number:Int!) { 34 | repository(owner: $repo_owner, name: $repo_name) { 35 | pullRequest(number: $pr_number) { 36 | comments(last: 100) { 37 | nodes { 38 | id 39 | author { 40 | login 41 | } 42 | bodyText 43 | isMinimized 44 | } 45 | } 46 | } 47 | } 48 | }`, 49 | { 50 | repo_owner: process.env.CIRCLE_PROJECT_USERNAME, 51 | repo_name: process.env.CIRCLE_PROJECT_REPONAME, 52 | pr_number: parseInt(path.basename(process.env.CIRCLE_PULL_REQUEST)), 53 | } 54 | ); 55 | for (const comment of repository.pullRequest.comments.nodes) { 56 | if ( 57 | comment.author.login === viewer.login 58 | && comment.bodyText.includes(diff_file) 59 | && !comment.isMinimized 60 | ) { 61 | console.log(`Minimizing comment ${comment.id}.`); 62 | await graphql_authorized( 63 | `mutation($comment_id:ID!) { 64 | minimizeComment(input: {subjectId: $comment_id, classifier: OUTDATED}) { 65 | clientMutationId 66 | } 67 | }`, 68 | { 69 | comment_id: comment.id, 70 | } 71 | ); 72 | } 73 | } 74 | } 75 | 76 | function diff() { 77 | let root = "/tmp/workspace/"; 78 | let diff_content = fs.readFileSync(root + "/" + diff_file, "utf8"); 79 | 80 | var body = "No content detected."; 81 | var warnings = ""; 82 | 83 | if (diff_content) { 84 | if (diff_content.length > max_content_length) { 85 | diff_content = diff_content.substring(0, max_content_length); 86 | warnings = "⚠️ Only part of the diff is displayed." 87 | } 88 | body = `
89 | Click to expand! 90 | 91 | \`\`\`diff 92 | ${diff_content} 93 | \`\`\` 94 | 95 |
96 | 97 | ${warnings} 98 | 99 | [Link to full diff](https://output.circle-artifacts.com/output/job/${process.env.CIRCLE_WORKFLOW_JOB_ID}/artifacts/${process.env.CIRCLE_NODE_INDEX}/${diff_file}) 100 | ` 101 | } 102 | var content = `#### \`${diff_file}\` 103 | ${body} 104 | `; 105 | return content; 106 | } 107 | 108 | function post_diff() { 109 | bot.comment( 110 | process.env.GH_AUTH_TOKEN, 111 | `### Integration report for "${bot.env.commitMessage}" 112 | ${diff()} 113 | ` 114 | ); 115 | } 116 | 117 | minimize_pr_diff_comments().then(post_diff); 118 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ### A list of files and file patterns to be excluded from the Dockerfile 2 | 3 | # 3rd-party configuration 4 | .circleci 5 | .github 6 | .vscode 7 | .gitignore 8 | 9 | # Config files 10 | .pre-commit-config.yaml 11 | 12 | # Docker artifacts 13 | Dockerfile 14 | docker-compose.yml 15 | 16 | # Python artifacts 17 | requirements.in 18 | *.pyc 19 | 20 | # Other 21 | Makefile 22 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | ignore = E203, W503 4 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | updates: 4 | - package-ecosystem: pip 5 | directory: / 6 | schedule: 7 | interval: daily 8 | 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.swo 4 | *.egg-info/ 5 | .DS_Store 6 | .vscode 7 | .mypy_cache/ 8 | .probe_cache 9 | venv/ 10 | namespaces.yaml 11 | looker-hub/ 12 | .env 13 | .vscode 14 | .python-version 15 | 16 | build/* 17 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile = black 3 | skip = venv 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.2.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: no-commit-to-branch 7 | args: [--branch, main] 8 | - repo: https://github.com/psf/black 9 | rev: 24.3.0 10 | hooks: 11 | - id: black 12 | - repo: https://github.com/PyCQA/flake8 13 | rev: 7.1.1 14 | hooks: 15 | - id: flake8 16 | - repo: https://github.com/PyCQA/isort 17 | rev: 5.12.0 18 | hooks: 19 | - id: isort 20 | - repo: https://github.com/PyCQA/pydocstyle 21 | rev: 6.3.0 22 | hooks: 23 | - id: pydocstyle 24 | exclude: (.*/)?test_.*\.py 25 | - repo: https://github.com/pre-commit/mirrors-mypy 26 | rev: v1.5.1 27 | hooks: 28 | - id: mypy 29 | additional_dependencies: 30 | - types-PyYAML 31 | - repo: https://github.com/adrienverge/yamllint 32 | rev: v1.37.0 33 | hooks: 34 | - id: yamllint 35 | args: [-c, .yamllint.yaml, .] 36 | -------------------------------------------------------------------------------- /.yamllint.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | rules: 3 | line-length: 4 | allow-non-breakable-words: true 5 | allow-non-breakable-inline-mappings: true 6 | indentation: 7 | spaces: consistent 8 | indent-sequences: true 9 | check-multi-line-strings: false 10 | 11 | ignore: | 12 | .git/ 13 | venv/ 14 | .circleci/ 15 | namespaces.yaml 16 | looker-hub/namespaces.yaml 17 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Dependency updates (via dependabot) 2 | requirements.in @mozilla/data-looker 3 | requirements.txt @mozilla/data-looker 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | LABEL maintainer="kignasiak@mozilla.com" 4 | 5 | ENV USER_ID="10001" 6 | ENV GROUP_ID="app" 7 | ENV HOME="/app" 8 | 9 | RUN groupadd --gid ${USER_ID} ${GROUP_ID} && \ 10 | useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /app ${GROUP_ID} 11 | 12 | # For grpc https://github.com/grpc/grpc/issues/24556#issuecomment-751797589 13 | RUN apt-get update -qqy && \ 14 | apt-get install -qqy build-essential git curl software-properties-common 15 | RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \ 16 | && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \ 17 | && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null 18 | RUN apt update 19 | RUN apt install -y gh 20 | 21 | COPY --from=google/cloud-sdk:339.0.0-alpine /google-cloud-sdk /google-cloud-sdk 22 | ENV PATH /google-cloud-sdk/bin:$PATH 23 | 24 | WORKDIR ${HOME} 25 | 26 | COPY requirements.txt . 27 | 28 | RUN pip install --upgrade pip \ 29 | && pip install --no-deps --no-cache-dir -r requirements.txt \ 30 | && rm requirements.txt 31 | 32 | COPY . ./lookml-generator 33 | RUN pip install --no-dependencies --no-cache-dir -e ./lookml-generator 34 | ENV PATH $PATH:${HOME}/lookml-generator/bin 35 | 36 | RUN chown -R ${USER_ID}:${GROUP_ID} ${HOME} 37 | USER ${USER_ID} 38 | 39 | ENTRYPOINT ["generate"] 40 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help build run shell 2 | 3 | help: 4 | @echo " build Builds the docker images for the docker-compose setup." 5 | @echo " run Runs a command." 6 | @echo " shell Opens a bash shell 7 | 8 | build: 9 | docker-compose build 10 | 11 | run: 12 | docker-compose run app $(COMMAND) 13 | 14 | shell: 15 | docker-compose run --entrypoint /bin/bash app 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lookml-generator 2 | [![mozilla](https://circleci.com/gh/mozilla/lookml-generator.svg?style=svg)](https://circleci.com/gh/mozilla/lookml-generator/?branch=main) 3 | 4 | LookML Generator for Glean and Mozilla Data. 5 | 6 | The lookml-generator has two important roles: 7 | 1. Generate a listing of all Glean/Mozilla namespaces and their associated BigQuery tables 8 | 2. From that listing, generate LookML for views, explores, and dashboards and push those to the [Look Hub project](https://github.com/mozilla/looker-hub) 9 | 10 | ## Generating Namespace Listings 11 | 12 | At Mozilla, a namespace is a single functional area that is represented in Looker with (usually) one model*. 13 | Each Glean application is self-contained within a single namespace, containing the data from [across that application's channels](https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings). 14 | We also support custom namespaces, which can use wildcards to denote their BigQuery datasets and tables. These are described in `custom-namespaces.yaml`. 15 | 16 | ![alt text](https://github.com/mozilla/lookml-generator/blob/main/architecture/namespaces.jpg?raw=true) 17 | 18 | > \* Though namespaces are not limited to a single model, we advise it for clarity's sake. 19 | 20 | ## Adding Custom Namespaces 21 | Custom namespaces need to be defined explicitly in `custom-namespaces.yaml`. For each namespace views and explores to be generated need to be specified. 22 | 23 | Make sure the custom namespaces is _not_ listed in `namespaces-disallowlist.yaml`. 24 | 25 | Once changes have been approved and merged, the [lookml-generator changes can get deployed](#deploying-new-lookml-generator-changes). 26 | 27 | ## Generating LookML 28 | Once we know which tables are associated with which namespaces, we can generate LookML files and update our Looker instance. 29 | 30 | Lookml-generator generates LookML based on both the BigQuery schema and manual changes. For example, we would want to add `city` drill-downs for all `country` fields. 31 | ![alt text](https://github.com/mozilla/lookml-generator/blob/main/architecture/lookml.jpg?raw=true) 32 | 33 | 34 | ### Pushing Changes to Dev Branches 35 | In addition to pushing new lookml to the [main branch](https://github.com/mozilla/looker-hub), we reset the dev branches to also 36 | point to the commit at `main`. This only happens during production deployment runs. 37 | 38 | To automate this process for your dev branch, add it to [this file](https://github.com/mozilla/lookml-generator/tree/main/bin/dev_branches). 39 | You can edit that file in your browser. Open a PR and tag [data-looker](https://github.com/orgs/mozilla/teams/data-looker) for review. 40 | You can find your dev branch by going to [Looker](https://mozilla.cloud.looker.com), entering development mode, opening the [`looker-hub`](https://mozilla.cloud.looker.com/projects/looker-hub) 41 | project, clicking the "Git Actions" icon, and finding your personal branch in the "Current Branch" dropdown. 42 | 43 | ## Setup 44 | 45 | Ensure Python 3.10+ is available on your machine (see [this guide](https://docs.python-guide.org/starting/install3/osx/) for instructions if you're on a mac and haven't installed anything other than the default system Python.) 46 | 47 | You will also need the Google Cloud SDK with valid credentials. 48 | After setting up the Google Cloud SDK, run: 49 | 50 | ```bash 51 | gcloud config set project moz-fx-data-shared-prod 52 | gcloud auth login --update-adc 53 | ``` 54 | 55 | Install requirements in a Python venv 56 | ```bash 57 | python3.10 -m venv venv/ 58 | venv/bin/pip install --no-deps -r requirements.txt 59 | ``` 60 | 61 | Update requirements when they change with `pip-sync` 62 | ```bash 63 | venv/bin/pip-sync 64 | ``` 65 | 66 | Setup pre-commit hooks 67 | ```bash 68 | venv/bin/pre-commit install 69 | ``` 70 | 71 | Run unit tests and linters 72 | ```bash 73 | venv/bin/pytest 74 | ``` 75 | 76 | Run integration tests 77 | ```bash 78 | venv/bin/pytest -m integration 79 | ``` 80 | 81 | Note that the integration tests require a valid login to BigQuery to succeed. 82 | 83 | ## Testing generation locally 84 | 85 | You can test namespace generation by running: 86 | 87 | ```bash 88 | ./bin/generator namespaces 89 | ``` 90 | 91 | To generate the actual lookml (in `looker-hub`), run: 92 | 93 | ```bash 94 | ./bin/generator lookml 95 | ``` 96 | 97 | ## Container Development 98 | 99 | Most code changes will not require changes to the generation script or container. 100 | However, you can test it locally. The following script will test generation, pushing 101 | a new branch to the `looker-hub` repository: 102 | 103 | ``` 104 | export HUB_BRANCH_PUBLISH="yourname-generation-test-1" 105 | export GIT_SSH_KEY_BASE64=$(cat ~/.ssh/id_rsa | base64) 106 | make build && make run 107 | ``` 108 | 109 | ## Deploying new `lookml-generator` changes 110 | 111 | `lookml-generator` runs daily to update the `looker-hub` and `looker-spoke-default` code. Changes 112 | to the underlying tables should automatically propogate to their respective views and explores. 113 | 114 | Airflow updates the two repositories [each morning](https://github.com/mozilla/telemetry-airflow/blob/main/dags/probe_scraper.py#L320). 115 | If you need your changes deployed quickly, wait for the container to build after you merge to 116 | `main`, and re-run the task in Airflow (`lookml_generator`, in the `probe_scraper` DAG). 117 | 118 | ## `generate` Command Explained - High Level Explanation 119 | 120 | When `make run` is executed a Docker container is spun up using the latest `lookml-generator` Docker image on your machine and runs the [`generate` script](bin/generate) using configuration defined at the top of the script unless [overridden using environment variables](./docker-compose.yml#L13-L25) (see the [Container Development](#container-development) section above). 121 | 122 | Next, the process authenticates with GitHub, clones the [`looker-hub` repository](https://github.com/mozilla/looker-hub), and creates the branch defined in the `HUB_BRANCH_PUBLISH` config variable both locally and in the remote. Then it proceeds to checkout into the looker-hub `base` branch and pulls it from the remote. 123 | 124 | Once the setup is done, the process generates `namespaces.yaml` and uses it to generate LookML code. A git diff is executed to ensure that the files that already exist in the `base` branch are not being modified. If changes are detected then the process exists with an error code. Otherwise, it proceeds to create a commit and push it to the remote dev branch created earlier. 125 | 126 | When following the `Container Development` steps, the entire process results in a dev branch in `looker-hub` with brand new generated LookML code which can be tested by going to Looker, switching to the "development mode" and selecting the dev branch just created/updated by this command. This will result in Looker using the brand new LookML code just generated. Otherwise, changes merged into `main` in this repo will become available on looker-hub `main` when the scheduled Airflow job runs. 127 | -------------------------------------------------------------------------------- /architecture/lookml.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/lookml-generator/684c39ac39a605cca4c76738d79795df89bd237a/architecture/lookml.jpg -------------------------------------------------------------------------------- /architecture/namespaces.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/lookml-generator/684c39ac39a605cca4c76738d79795df89bd237a/architecture/namespaces.jpg -------------------------------------------------------------------------------- /architecture/namespaces_yaml.md: -------------------------------------------------------------------------------- 1 | # `namespaces.yaml` 2 | 3 | We use [`namespaces.yaml`](https://github.com/mozilla/looker-hub/blob/main/namespaces.yaml) as the declarative listing of the Looker namespaces generated by this repository. 4 | 5 | Each entry in `namespaces.yaml` represents a namespace, and has the following properties: 6 | 7 | - `owners` (string): The owners are the people who will have control over the associated Namespace folder in Looker. It is up to them to decide which dashboards to "promote" to their shared folder. 8 | - `pretty_name` (string): The pretty name is used in most places where the namespace's name is seen, e.g. in the explore drop-down and folder name. 9 | - `glean_app` (bool): Whether or not this namespace represents a Glean Application. 10 | - `connection` (optional string): The database connection to use, as named in Looker. Defaults to `telemetry`. 11 | - `views` (object): The LookML View files that will be generated. More detailed info below. 12 | - `explores` (object): The LookML Explore files that will be generated. More detailed info below. 13 | 14 | ## `views` 15 | 16 | Each View entry is actually a LookML view file that will be generated. 17 | Each LookML View file can contain multiple Looker Views; the idea here is that these views are related and used together. By convention, the first view in the file is the base view (i.e. associated join views folllow after the explore containing the base dimension and metrics). 18 | 19 | - `type`: The type of the view, e.g. `glean_ping_view`. 20 | - `tables`: This field is used in a few ways, depending on the associated View type. 21 | 22 | For `GleanPingView` and `PingView`, `tables` represents all of the associated channels for that view. Each table will have a `channel` and `table` entry. Only a single view will be created in the LookML File. 23 | 24 | ```yaml 25 | tables: 26 | - channel: release 27 | table: mozdata.org_mozilla_firefox.metrics 28 | - channel: nightly 29 | table: mozdata.org_mozilla_fenix.metrics 30 | ``` 31 | 32 | For `ClientCountView` and `GrowthAccountingView`, `tables` will have a single entry, with the name of the table the Looker View is based off of. Only a single Looker View will be created. 33 | 34 | ```yaml 35 | tables: 36 | - table: mozdata.org_mozilla_firefox.baseline_clients_last_seen 37 | ``` 38 | 39 | For `FunnelAnalysisView`, only the first list entry is used; inside that entry, each value represents a Looker View that is created. The key is the name of the view, the value is the Looker View or BQ View it is derived from. 40 | In the following example, 4 views will be created in the view file: `funnel_analysis`, `event_types`, `event_type_1` and `event_type_2`. 41 | 42 | ```yaml 43 | tables: 44 | - funnel_analysis: events_daily_table 45 | event_types: `mozdata.glean_app.event_types` 46 | event_type_1: event_types 47 | event_type_2: event_types 48 | ``` 49 | 50 | ## `explores` 51 | 52 | Each Explore entry is a single file, sometimes containing multiple explores within it (mainly for things like changing suggestions). 53 | 54 | - `type` - The type of the explore, e.g. `growth_accounting_explore`. 55 | - `views` - The views that this is based on. Generally, the allowed keys here are: 56 | - `base_view`: The base view is the one we are basing this Explore on, using [`view_name`](https://docs.looker.com/reference/explore-params/view_name). 57 | - `extended_view*`: Any views we include in the `base_view` are added as these. It could be one (`extended_view`) or multiple (`extended_view_1`). 58 | - `joined_view*`: Any other view we are going to join to this one. _This is only required if the joined view is not defined in the same view file as `base_view`._ 59 | 60 | It may not necessarily be desirable to list all of the views and explores in the `namespace.yaml` (e.g. suggest explores specific to a view). In these cases, it is useful to adopt the convention where the first view is the primary view for the explore. 61 | -------------------------------------------------------------------------------- /bin/dev_branches: -------------------------------------------------------------------------------- 1 | dev-frank-bertsch-t9kx 2 | -------------------------------------------------------------------------------- /bin/generate: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A script for generating `namespaces.yaml` and the associated LookML. 4 | # This repository builds namespaces.yaml from Glean applications and 5 | # `custom-namespaces.yaml`, and then generates files and LookML that 6 | # match the specification in `namespaces.yaml` and table definitions 7 | # in BigQuery. The resulting LookML is pushed to our `looker-hub` 8 | # repository. 9 | # 10 | # Environment variables: 11 | # GIT_SSH_KEY_BASE64: A base64-encoded ssh secret key with permissions to push 12 | # to looker-hub and looker-spoke-default. 13 | # HUB_REPO_URL: The URL to the looker-hub repository. 14 | # Requires the SSH format, e.g. git@github.com:user/repo.git. 15 | # HUB_BRANCH_SOURCE: The source branch for generating LookML. 16 | # Defaults to 'base'. Files present the source 17 | # branch will remain unchanged by generation. 18 | # HUB_BRANCH_PUBLISH: The destination branch for publishing LookML. 19 | # Defaults to 'test-lookml-generation'. If the 20 | # branch doesn't exist, creates it from source. 21 | # SPOKE_REPO_URL: The URL to the looker-spoke-default repository. 22 | # Requires the SSH format, e.g. git@github.com:user/repo.git. 23 | # PRIVATE_SPOKE_REPO_URL: The URL to the looker-spoke-private repository. 24 | # Requires the SSH format, e.g. git@github.com:user/repo.git. 25 | # SPOKE_BRANCH_WORKING: The working branch for the spoke project. 26 | # This is the branch that changes will be 27 | # published to, and a PR opened to merge 28 | # them in to SPOKE_BRANCH_PUBLISH. 29 | # Defaults to SPOKE_BRANCH_PUBLISH-working. 30 | # SPOKE_BRANCH_PUBLISH: The publish branch for the spoke project. 31 | # A PR to merge the changes from SPOKE_BRANCH_WORKING 32 | # to here will be opened. This branch also serves as 33 | # the source branch. 34 | # LOOKER_INSTANCE_URI: The URI of our looker instance. Defaults to dev. 35 | # LOOKER_API_CLIENT_ID: Client ID for Looker access. If unset, does 36 | # not run `generator content`. 37 | # LOOKER_API_CLIENT_SECRET: Client Secret for Looker access. If unset, does 38 | # not run `generator content`. 39 | # GITHUB_ACCESS_TOKEN: Access token for Github. Needs read and write 40 | # access to repos. Not required in dev. 41 | # UPDATE_DEV_BRANCHES: Whether or not the dev branches should be updated. 42 | # This should only happen in production. 43 | # UPDATE_SPOKE_BRANCHES: Whether or not pull-requests should be opened against 44 | # branches in spoke projects. 45 | # This should only happen in production. 46 | # 47 | # Example usage: 48 | # export GIT_SSH_KEY_BASE64=$(cat ~/.ssh/id_rsa | base64) 49 | # make build && make run 50 | 51 | HUB_REPO_URL=${HUB_REPO_URL:-"git@github.com:mozilla/looker-hub.git"} 52 | HUB_BRANCH_SOURCE=${HUB_BRANCH_SOURCE:-"base"} 53 | HUB_BRANCH_PUBLISH=${HUB_BRANCH_PUBLISH:-"main-nonprod"} 54 | 55 | SPOKE_REPO_URL=${SPOKE_REPO_URL:-"git@github.com:mozilla/looker-spoke-default.git"} 56 | PRIVATE_SPOKE_REPO_URL=${PRIVATE_SPOKE_REPO_URL:-"git@github.com:mozilla/looker-spoke-private.git"} 57 | SPOKE_BRANCH_PUBLISH=${SPOKE_BRANCH_PUBLISH:-"main-nonprod"} 58 | SPOKE_BRANCH_WORKING=${SPOKE_BRANCH_WORKING:-"${SPOKE_BRANCH_PUBLISH}-working"} 59 | 60 | LOOKER_INSTANCE_URI=${LOOKER_INSTANCE_URI:-"https://mozilladev.cloud.looker.com"} 61 | UPDATE_DEV_BRANCHES=${UPDATE_DEV_BRANCHES:-"false"} 62 | UPDATE_SPOKE_BRANCHES=${UPDATE_SPOKE_BRANCHES:-"false"} 63 | 64 | function setup_git_auth() { 65 | # Configure the container for pushing to github. 66 | 67 | if [[ -z "$GIT_SSH_KEY_BASE64" ]]; then 68 | echo "Missing secret key" 1>&2 69 | exit 1 70 | fi 71 | 72 | git config --global user.name "Generated LookML Creator" 73 | git config --global user.email "dataops+looker@mozilla.com" 74 | 75 | mkdir -p "$HOME/.ssh" 76 | 77 | echo "$GIT_SSH_KEY_BASE64" | base64 --decode > "$HOME"/.ssh/id_ed25519 78 | # Makes the future git-push non-interactive 79 | ssh-keyscan github.com > "$HOME"/.ssh/known_hosts 80 | 81 | chown -R "$(id -u):$(id -g)" "$HOME/.ssh" 82 | chmod 700 "$HOME/.ssh" 83 | chmod 700 "$HOME/.ssh/id_ed25519" 84 | 85 | # add private key to the ssh agent to prompt for password once 86 | eval "$(ssh-agent)" 87 | ssh-add 88 | } 89 | 90 | function setup_github_auth() { 91 | # Configure Github CLI auth. 92 | 93 | if [[ -z "$GITHUB_ACCESS_TOKEN" ]]; then 94 | gh auth login 95 | else 96 | gh auth login --with-token <<< "$GITHUB_ACCESS_TOKEN" 97 | fi 98 | } 99 | 100 | function setup_hub() { 101 | # Checkout looker-hub and changes directory to prepare for 102 | # LookML generation. 103 | pushd . 104 | 105 | cd /app 106 | [[ -d looker-hub ]] && rm -rf looker-hub 107 | git clone "$HUB_REPO_URL" 108 | cd looker-hub 109 | git fetch --all 110 | # If publish branch doesn't exist, create it from main 111 | git checkout "$HUB_BRANCH_PUBLISH" || (git checkout main && git checkout -b "$HUB_BRANCH_PUBLISH") 112 | git checkout "$HUB_BRANCH_SOURCE" 113 | 114 | popd 115 | } 116 | 117 | function setup_spoke() { 118 | # Checkout looker-spoke-default and changes directory to prepare for 119 | # LookML generation. Create publish branch if non-existent. 120 | pushd . 121 | spoke="$1" 122 | spoke_url="$2" 123 | 124 | cd /app 125 | [[ -d $spoke ]] && rm -rf $spoke 126 | git clone "$spoke_url" 127 | cd $spoke 128 | git fetch --all 129 | git checkout $SPOKE_BRANCH_PUBLISH || (git checkout main && git checkout -b $SPOKE_BRANCH_PUBLISH) 130 | git branch -D $SPOKE_BRANCH_WORKING || true # delete working branch if it exists 131 | git checkout -b $SPOKE_BRANCH_WORKING 132 | 133 | popd 134 | } 135 | 136 | function setup_spokes() { 137 | setup_spoke "looker-spoke-default" $SPOKE_REPO_URL 138 | setup_spoke "looker-spoke-private" $PRIVATE_SPOKE_REPO_URL 139 | } 140 | 141 | function check_files_and_commit() { 142 | # Add the new files and commit. 143 | # Use interactive mode to add untracked files 144 | # This also works when it's untracked directories 145 | echo -e "a\n*\nu\n*\nq\n"|git add -i 146 | git commit -m "Auto-push from LookML generation" \ 147 | || echo "Nothing to commit" 148 | } 149 | 150 | function generate_hub_commit() { 151 | # Generate commit on publish branch with 152 | # generated LookML. 153 | 154 | pushd . 155 | cd /app 156 | 157 | HUB_DIR="looker-hub" 158 | NAMESPACE_DISALLOWLIST="/app/lookml-generator/namespaces-disallowlist.yaml" 159 | CUSTOM_NAMESPACES_FILENAME="/app/lookml-generator/custom-namespaces.yaml" 160 | GENERATED_SQL_URI="https://github.com/mozilla/bigquery-etl/archive/generated-sql.tar.gz" 161 | APP_LISTINGS_URI="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings" 162 | 163 | # Generate namespaces.yaml and LookML 164 | lookml-generator namespaces \ 165 | --custom-namespaces $CUSTOM_NAMESPACES_FILENAME \ 166 | --generated-sql-uri $GENERATED_SQL_URI \ 167 | --app-listings-uri $APP_LISTINGS_URI \ 168 | --disallowlist $NAMESPACE_DISALLOWLIST 169 | lookml-generator lookml \ 170 | --namespaces "namespaces.yaml" \ 171 | --target-dir $HUB_DIR 172 | 173 | cd $HUB_DIR 174 | 175 | check_files_and_commit 176 | 177 | # Checkout main. Match it with source branch. 178 | git checkout "$HUB_BRANCH_PUBLISH" 179 | find . -mindepth 1 -maxdepth 1 -not -name .git -exec rm -rf {} + 180 | git checkout "$HUB_BRANCH_SOURCE" -- * 181 | git commit --all \ 182 | --message "Auto-push from LookML generation" \ 183 | || echo "Nothing to commit" 184 | 185 | popd 186 | } 187 | 188 | function update_dev_branches() { 189 | # Reset all dev branches to main 190 | 191 | pushd . 192 | cd /app/looker-hub 193 | 194 | dev_branches_file="/app/lookml-generator/bin/dev_branches" 195 | while read branch; do 196 | git checkout $branch 197 | git reset --hard main 198 | git push -f origin $branch 199 | done < $dev_branches_file 200 | 201 | popd 202 | } 203 | 204 | function generate_spoke_commits() { 205 | # Generate commit on spoke publish branch 206 | # with generated LookML. 207 | 208 | pushd . 209 | cd /app 210 | 211 | lookml-generator update-spoke \ 212 | --namespaces "namespaces.yaml" \ 213 | --spoke-dir "/app" 214 | 215 | cd "/app/looker-spoke-default" 216 | check_files_and_commit 217 | 218 | cd "/app/looker-spoke-private" 219 | check_files_and_commit 220 | 221 | popd 222 | } 223 | 224 | function hit_looker_webhooks() { 225 | # These webhooks ensure production is up-to-date. 226 | # See https://help.looker.com/hc/en-us/articles/360001288268-Deploy-Webhook-Pulling-From-Remote-Git-Repository 227 | curl "$LOOKER_INSTANCE_URI/webhooks/projects/looker-hub/deploy" 228 | curl "$LOOKER_INSTANCE_URI/webhooks/projects/spoke-default/deploy" 229 | curl "$LOOKER_INSTANCE_URI/webhooks/projects/spoke-private/deploy" 230 | } 231 | 232 | function push_and_open_spoke_pull_request() { 233 | # Open a PR to merge the changes from working branch 234 | # in to the publish branch. 235 | 236 | git push -f || git push -f --set-upstream origin "$SPOKE_BRANCH_WORKING" 237 | 238 | # https://bugzilla.mozilla.org/show_bug.cgi?id=1774030 239 | # could not request reviewer: 'mozilla/data-looker' not found 240 | # git diff --quiet $SPOKE_BRANCH_PUBLISH...$SPOKE_BRANCH_WORKING || 241 | # gh pr create \ 242 | # --title "Auto-push from LookML Generator" \ 243 | # --reviewer mozilla/data-looker \ 244 | # --base $SPOKE_BRANCH_PUBLISH \ 245 | # --head $SPOKE_BRANCH_WORKING \ 246 | # --body "" 247 | 248 | git diff --quiet $SPOKE_BRANCH_PUBLISH...$SPOKE_BRANCH_WORKING || 249 | gh pr create \ 250 | --title "Auto-push from LookML Generator" \ 251 | --base $SPOKE_BRANCH_PUBLISH \ 252 | --head $SPOKE_BRANCH_WORKING \ 253 | --body "" 254 | } 255 | 256 | function main() { 257 | set -e # stop if any statement returns a non-zero exit code 258 | pushd . 259 | cd /app 260 | 261 | set +x # don't print these commands 262 | setup_git_auth 263 | setup_github_auth 264 | 265 | # Set up hub and commit 266 | set -x # print these commands 267 | setup_hub 268 | generate_hub_commit 269 | 270 | # Publish hub 271 | cd /app/looker-hub 272 | git push || git push --set-upstream origin "$HUB_BRANCH_PUBLISH" 273 | 274 | # Update dev branches 275 | if [ "$UPDATE_DEV_BRANCHES" = "true" ] ; then 276 | update_dev_branches 277 | fi 278 | 279 | # Update branches on spoke-default and spoke-private 280 | if [ "$UPDATE_SPOKE_BRANCHES" = "true" ] ; then 281 | # Set up spokes and commit 282 | setup_spokes 283 | generate_spoke_commits 284 | 285 | # Publish spoke - force push to working branch 286 | cd /app/looker-spoke-default 287 | push_and_open_spoke_pull_request 288 | 289 | cd /app/looker-spoke-private 290 | push_and_open_spoke_pull_request 291 | 292 | # Update Looker content 293 | hit_looker_webhooks 294 | fi 295 | 296 | popd 297 | } 298 | 299 | main "$@" 300 | -------------------------------------------------------------------------------- /bin/generator: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd "$(dirname "$0")/.." 4 | 5 | if [ -e venv ]; then 6 | . venv/bin/activate 7 | fi 8 | 9 | exec python3 -c "from generator import cli; cli(prog_name='$0')" "$@" 10 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | 4 | services: 5 | app: 6 | build: 7 | context: . 8 | dockerfile: Dockerfile 9 | restart: "no" 10 | command: "true" 11 | volumes: 12 | - ~/.config:/app/.config 13 | environment: 14 | - GIT_SSH_KEY_BASE64 15 | - HUB_REPO_URL 16 | - HUB_BRANCH_SOURCE 17 | - HUB_BRANCH_PUBLISH 18 | - SPOKE_REPO_URL 19 | - SPOKE_BRANCH_PUBLISH 20 | - LOOKER_INSTANCE_URI 21 | - LOOKER_API_CLIENT_ID 22 | - LOOKER_API_CLIENT_SECRET 23 | - GITHUB_ACCESS_TOKEN 24 | - UPDATE_DEV_BRANCHES 25 | - UPDATE_SPOKE_BRANCHES 26 | -------------------------------------------------------------------------------- /generator/__init__.py: -------------------------------------------------------------------------------- 1 | """Generate LookML. 2 | 3 | .. include:: ../README.md 4 | .. include:: ../architecture/namespaces_yaml.md 5 | """ 6 | 7 | __docformat__ = "restructuredtext" 8 | 9 | import sys 10 | import warnings 11 | 12 | import click 13 | from google.auth.exceptions import DefaultCredentialsError 14 | from google.cloud import bigquery 15 | 16 | from .lookml import lookml 17 | from .namespaces import namespaces 18 | from .spoke import update_spoke 19 | 20 | 21 | def is_authenticated(): 22 | """Check if the user is authenticated to GCP.""" 23 | try: 24 | bigquery.Client() 25 | except DefaultCredentialsError: 26 | return False 27 | return True 28 | 29 | 30 | def cli(prog_name=None): 31 | """Generate and run CLI.""" 32 | if not is_authenticated(): 33 | print( 34 | "Authentication to GCP required. Run `gcloud auth login --update-adc` " 35 | "and check that the project is set correctly." 36 | ) 37 | sys.exit(1) 38 | 39 | commands = { 40 | "namespaces": namespaces, 41 | "lookml": lookml, 42 | "update-spoke": update_spoke, 43 | } 44 | 45 | @click.group(commands=commands) 46 | def group(): 47 | """CLI interface for lookml automation.""" 48 | 49 | warnings.filterwarnings( 50 | "ignore", 51 | "Your application has authenticated using end user credentials", 52 | module="google.auth._default", 53 | ) 54 | 55 | group(prog_name=prog_name) 56 | -------------------------------------------------------------------------------- /generator/__main__.py: -------------------------------------------------------------------------------- 1 | """Run lookml_generator cli.""" 2 | 3 | from . import cli 4 | 5 | 6 | def main(): 7 | """Run the CLI.""" 8 | cli("generator") 9 | 10 | 11 | if __name__ == "__main__": 12 | main() 13 | -------------------------------------------------------------------------------- /generator/dashboards/__init__.py: -------------------------------------------------------------------------------- 1 | """All possible dashboard types.""" 2 | 3 | from .dashboard import Dashboard # noqa: F401 4 | from .operational_monitoring_dashboard import OperationalMonitoringDashboard 5 | 6 | DASHBOARD_TYPES = { 7 | OperationalMonitoringDashboard.type: OperationalMonitoringDashboard, 8 | } 9 | -------------------------------------------------------------------------------- /generator/dashboards/dashboard.py: -------------------------------------------------------------------------------- 1 | """Generic dashboard type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field 6 | from typing import Dict, List 7 | 8 | 9 | @dataclass 10 | class Dashboard(object): 11 | """A generic Looker Dashboard.""" 12 | 13 | title: str 14 | name: str 15 | layout: str 16 | namespace: str 17 | tables: List[Dict[str, str]] 18 | type: str = field(init=False) 19 | 20 | def to_dict(self) -> dict: 21 | """Dashboard instance represented as a dict.""" 22 | return { 23 | self.name: { 24 | "title": self.title, 25 | "type": self.type, 26 | "layout": self.layout, 27 | "namespace": self.namespace, 28 | "tables": self.tables, 29 | } 30 | } 31 | 32 | def to_lookml(self): 33 | """Generate Lookml for this dashboard.""" 34 | raise NotImplementedError("Only implemented in subclass.") 35 | -------------------------------------------------------------------------------- /generator/dashboards/operational_monitoring_dashboard.py: -------------------------------------------------------------------------------- 1 | """Class to describe Operational Monitoring Dashboard.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any, Dict, List 6 | 7 | from ..views import lookml_utils 8 | from .dashboard import Dashboard 9 | 10 | 11 | class OperationalMonitoringDashboard(Dashboard): 12 | """An Operational Monitoring dashboard.""" 13 | 14 | type: str = "operational_monitoring_dashboard" 15 | 16 | def __init__( 17 | self, 18 | title: str, 19 | name: str, 20 | layout: str, 21 | namespace: str, 22 | defn: List[Dict[str, Any]], 23 | ): 24 | """Get an instance of a Operational Monitoring Dashboard.""" 25 | self.dimensions = defn[0].get("dimensions", {}) 26 | self.xaxis = defn[0]["xaxis"] 27 | self.compact_visualization = defn[0].get("compact_visualization", False) 28 | self.group_by_dimension = defn[0].get("group_by_dimension", None) 29 | 30 | super().__init__(title, name, layout, namespace, defn) 31 | 32 | @classmethod 33 | def from_dict( 34 | klass, namespace: str, name: str, defn: dict 35 | ) -> OperationalMonitoringDashboard: 36 | """Get a OperationalMonitoringDashboard from a dict representation.""" 37 | title = defn["title"] 38 | return klass(title, name, "newspaper", namespace, defn["tables"]) 39 | 40 | def _map_series_to_colours(self, branches, explore): 41 | colours = [ 42 | "#3FE1B0", 43 | "#0060E0", 44 | "#9059FF", 45 | "#B933E1", 46 | "#FF2A8A", 47 | "#FF505F", 48 | "#FF7139", 49 | "#FFA537", 50 | "#005E5D", 51 | "#073072", 52 | "#7F165B", 53 | "#A7341F", 54 | ] 55 | return {branch: color for branch, color in zip(branches, colours)} 56 | 57 | def to_lookml(self): 58 | """Get this dashboard as LookML.""" 59 | kwargs = { 60 | "name": self.name, 61 | "title": self.title, 62 | "layout": self.layout, 63 | "elements": [], 64 | "dimensions": [], 65 | "group_by_dimension": self.group_by_dimension, 66 | "alerts": None, 67 | "compact_visualization": self.compact_visualization, 68 | } 69 | 70 | includes = [] 71 | graph_index = 0 72 | for table_defn in self.tables: 73 | explore = table_defn["explore"] 74 | includes.append( 75 | f"/looker-hub/{self.namespace}/explores/{explore}.explore.lkml" 76 | ) 77 | 78 | if table_defn["table"].endswith("alerts"): 79 | kwargs["alerts"] = { 80 | "explore": explore, 81 | "col": 0, 82 | "date": ( 83 | f"{self.xaxis}_date" if self.xaxis == "build_id" else self.xaxis 84 | ), 85 | } 86 | else: 87 | if len(kwargs["dimensions"]) == 0: 88 | kwargs["dimensions"] = [ 89 | { 90 | "name": name, 91 | "title": lookml_utils.slug_to_title(name), 92 | "default": info["default"], 93 | "options": info["options"], 94 | } 95 | for name, info in self.dimensions.items() 96 | ] 97 | 98 | series_colors = self._map_series_to_colours( 99 | table_defn["branches"], explore 100 | ) 101 | # determine metric groups 102 | metric_groups = {} 103 | for summary in table_defn.get("summaries", []): 104 | for metric_group in summary.get("metric_groups", []): 105 | if metric_group not in metric_groups: 106 | metric_groups[metric_group] = [summary["metric"]] 107 | elif summary["metric"] not in metric_groups[metric_group]: 108 | metric_groups[metric_group].append(summary["metric"]) 109 | 110 | seen_metric_groups = [] 111 | for summary in table_defn.get("summaries", []): 112 | summary_metric_groups = summary.get("metric_groups", []) 113 | if len(summary_metric_groups) == 0: 114 | # append a dummy entry if no metric group defined 115 | summary_metric_groups.append(None) 116 | 117 | for metric_group in summary_metric_groups: 118 | if (metric_group, summary["statistic"]) in seen_metric_groups: 119 | continue 120 | 121 | if self.compact_visualization: 122 | title = "Metric" 123 | else: 124 | if metric_group is None: 125 | title = lookml_utils.slug_to_title(summary["metric"]) 126 | else: 127 | title = lookml_utils.slug_to_title(metric_group) 128 | 129 | if not self.group_by_dimension: 130 | kwargs["elements"].append( 131 | { 132 | "title": title, 133 | "metric": ( 134 | summary["metric"] 135 | if metric_group is None 136 | else ", ".join( 137 | f'"{m}"' 138 | for m in metric_groups[metric_group] 139 | ) 140 | ), 141 | "statistic": summary["statistic"], 142 | "explore": explore, 143 | "series_colors": series_colors, 144 | "xaxis": self.xaxis, 145 | "row": int(graph_index / 2) * 10, 146 | "col": 0 if graph_index % 2 == 0 else 12, 147 | "is_metric_group": metric_group is not None, 148 | } 149 | ) 150 | if metric_group is not None: 151 | seen_metric_groups.append( 152 | (metric_group, summary["statistic"]) 153 | ) 154 | graph_index += 1 155 | 156 | if self.group_by_dimension: 157 | kwargs["elements"].append( 158 | { 159 | "title": f"{title} - By {self.group_by_dimension}", 160 | "metric": ( 161 | summary["metric"] 162 | if metric_group is None 163 | else ", ".join( 164 | f'"{m}"' 165 | for m in metric_groups[metric_group] 166 | ) 167 | ), 168 | "statistic": summary["statistic"], 169 | "explore": explore, 170 | "series_colors": series_colors, 171 | "xaxis": self.xaxis, 172 | "row": int(graph_index / 2) * 10, 173 | "col": 0 if graph_index % 2 == 0 else 12, 174 | "is_metric_group": metric_group is not None, 175 | } 176 | ) 177 | graph_index += 1 178 | 179 | if self.compact_visualization: 180 | # compact visualization only needs a single tile for all probes 181 | break 182 | 183 | if self.compact_visualization: 184 | # compact visualization only needs a single tile for all probes 185 | break 186 | 187 | if "alerts" in kwargs and kwargs["alerts"] is not None: 188 | kwargs["alerts"]["row"] = int(graph_index / 2) * 10 189 | 190 | dash_lookml = lookml_utils.render_template( 191 | "dashboard.lkml", "dashboards", **kwargs 192 | ) 193 | return dash_lookml 194 | -------------------------------------------------------------------------------- /generator/dashboards/templates/dashboard.lkml: -------------------------------------------------------------------------------- 1 | - dashboard: {{name}} 2 | title: {{title}} 3 | layout: {{layout}} 4 | preferred_viewer: dashboards-next 5 | 6 | elements: 7 | {% for element in elements -%} 8 | - title: {{element.title}} 9 | name: {{element.title}}_{{element.statistic}} 10 | {% if not compact_visualization -%} 11 | note_state: expanded 12 | note_display: above 13 | note_text: {{element.statistic.title()}} 14 | {% endif -%} 15 | explore: {{element.explore}} 16 | {% if element.statistic == "percentile" -%} 17 | type: "ci-line-chart" 18 | {% else -%} 19 | type: looker_line 20 | {% endif -%} 21 | fields: [ 22 | {{element.explore}}.{{element.xaxis}}, 23 | {{element.explore}}.branch, 24 | {% if element.statistic == "percentile" -%} 25 | {{element.explore}}.upper, 26 | {{element.explore}}.lower, 27 | {% endif -%} 28 | {{element.explore}}.point 29 | ] 30 | pivots: [ 31 | {{element.explore}}.branch 32 | {%- if group_by_dimension and element.title.endswith(group_by_dimension) %}, {{element.explore}}.{{group_by_dimension}}{% endif %} 33 | {%- if element.is_metric_group %}, {{element.explore}}.metric{% endif %} 34 | ] 35 | {% if not compact_visualization -%} 36 | filters: 37 | {{element.explore}}.metric: '{{element.metric}}' 38 | {{element.explore}}.statistic: {{element.statistic}} 39 | {% endif -%} 40 | row: {{element.row}} 41 | col: {{element.col}} 42 | width: 12 43 | height: 8 44 | field_x: {{element.explore}}.{{element.xaxis}} 45 | field_y: {{element.explore}}.point 46 | log_scale: false 47 | ci_lower: {{element.explore}}.lower 48 | ci_upper: {{element.explore}}.upper 49 | show_grid: true 50 | listen: 51 | Date: {{element.explore}}.{{element.xaxis}} 52 | {%- if element.statistic == "percentile" %} 53 | Percentile: {{element.explore}}.parameter 54 | {%- endif %} 55 | {%- for dimension in dimensions %} 56 | {{dimension.title}}: {{element.explore}}.{{dimension.name}} 57 | {%- endfor %} 58 | {% if compact_visualization -%} 59 | Metric: {{element.explore}}.metric 60 | Statistic: {{element.explore}}.statistic 61 | {% endif -%} 62 | {%- for branch, color in element.series_colors.items() %} 63 | {{ branch }}: "{{ color }}" 64 | {%- endfor %} 65 | defaults_version: 0 66 | {% endfor -%} 67 | {% if alerts is not none %} 68 | - title: Alerts 69 | name: Alerts 70 | model: operational_monitoring 71 | explore: {{alerts.explore}} 72 | type: looker_grid 73 | fields: [{{alerts.explore}}.{{elements[0].xaxis}}, 74 | {%- for dimension in dimensions %} 75 | {{alerts.explore}}.{{dimension.name}}, 76 | {%- endfor %} 77 | {{alerts.explore}}.metric, {{alerts.explore}}.statistic, {{alerts.explore}}.parameter, 78 | {{alerts.explore}}.message, {{alerts.explore}}.branch, {{alerts.explore}}.errors] 79 | sorts: [{{alerts.explore}}.submission_date 80 | desc] 81 | limit: 500 82 | show_view_names: false 83 | show_row_numbers: true 84 | transpose: false 85 | truncate_text: true 86 | hide_totals: false 87 | hide_row_totals: false 88 | size_to_fit: true 89 | table_theme: white 90 | limit_displayed_rows: false 91 | enable_conditional_formatting: false 92 | header_text_alignment: left 93 | header_font_size: 12 94 | rows_font_size: 12 95 | conditional_formatting_include_totals: false 96 | conditional_formatting_include_nulls: false 97 | x_axis_gridlines: false 98 | y_axis_gridlines: true 99 | show_y_axis_labels: true 100 | show_y_axis_ticks: true 101 | y_axis_tick_density: default 102 | y_axis_tick_density_custom: 5 103 | show_x_axis_label: true 104 | show_x_axis_ticks: true 105 | y_axis_scale_mode: linear 106 | x_axis_reversed: false 107 | y_axis_reversed: false 108 | plot_size_by_field: false 109 | trellis: '' 110 | stacking: '' 111 | legend_position: center 112 | point_style: none 113 | show_value_labels: false 114 | label_density: 25 115 | x_axis_scale: auto 116 | y_axis_combined: true 117 | show_null_points: true 118 | interpolation: linear 119 | defaults_version: 1 120 | series_types: {} 121 | listen: 122 | Date: {{alerts.explore}}.{{alerts.date}} 123 | row: {{ alerts.row }} 124 | col: {{ alerts.col }} 125 | width: 24 126 | height: 6 127 | {% endif %} 128 | filters: 129 | - name: Date 130 | title: Date 131 | type: field_filter 132 | allow_multiple_values: true 133 | required: false 134 | ui_config: 135 | type: advanced 136 | display: popover 137 | model: operational_monitoring 138 | explore: {{elements[0].explore}} 139 | listens_to_filters: [] 140 | field: {{elements[0].explore}}.{{elements[0].xaxis}} 141 | 142 | - name: Percentile 143 | title: Percentile 144 | type: field_filter 145 | default_value: '50' 146 | allow_multiple_values: false 147 | required: true 148 | ui_config: 149 | type: advanced 150 | display: popover 151 | model: operational_monitoring 152 | explore: {{ elements[0].explore }} 153 | listens_to_filters: [] 154 | field: {{ elements[0].explore }}.parameter 155 | {% if compact_visualization -%} 156 | - name: Metric 157 | title: Metric 158 | type: field_filter 159 | default_value: '{{ elements[0].metric }}' 160 | allow_multiple_values: false 161 | required: true 162 | ui_config: 163 | type: dropdown_menu 164 | display: popover 165 | model: operational_monitoring 166 | explore: {{ elements[0].explore }} 167 | listens_to_filters: [] 168 | field: {{ elements[0].explore }}.metric 169 | - name: Statistic 170 | title: Statistic 171 | type: field_filter 172 | default_value: '{{ elements[0].statistic }}' 173 | allow_multiple_values: false 174 | required: true 175 | ui_config: 176 | type: dropdown_menu 177 | display: popover 178 | model: operational_monitoring 179 | explore: {{ elements[0].explore }} 180 | listens_to_filters: [Metric] 181 | field: {{ elements[0].explore }}.statistic 182 | {% endif -%} 183 | 184 | {% for dimension in dimensions -%} 185 | {% if dimension.name != group_by_dimension %} 186 | - title: {{dimension.title}} 187 | name: {{dimension.title}} 188 | type: string_filter 189 | default_value: '{{dimension.default}}' 190 | allow_multiple_values: false 191 | required: true 192 | ui_config: 193 | type: dropdown_menu 194 | display: inline 195 | options: 196 | {% for option in dimension.options -%} 197 | - '{{option}}' 198 | {% endfor %} 199 | {% else %} 200 | - title: {{dimension.title}} 201 | name: {{dimension.title}} 202 | type: string_filter 203 | default_value: '{% for option in dimension.options | sort -%}{{option}}{% if not loop.last %},{% endif %}{% endfor %}' 204 | allow_multiple_values: true 205 | required: true 206 | ui_config: 207 | type: advanced 208 | display: inline 209 | options: 210 | {% for option in dimension.options | sort -%} 211 | - '{{option}}' 212 | {% endfor %} 213 | {% endif %} 214 | {% endfor -%} 215 | -------------------------------------------------------------------------------- /generator/dryrun.py: -------------------------------------------------------------------------------- 1 | """Dry Run method to get BigQuery metadata.""" 2 | 3 | import json 4 | from enum import Enum 5 | from functools import cached_property 6 | from typing import Optional 7 | from urllib.request import Request, urlopen 8 | 9 | import google.auth 10 | from google.auth.transport.requests import Request as GoogleAuthRequest 11 | from google.cloud import bigquery 12 | from google.oauth2.id_token import fetch_id_token 13 | 14 | DRY_RUN_URL = ( 15 | "https://us-central1-moz-fx-data-shared-prod.cloudfunctions.net/bigquery-etl-dryrun" 16 | ) 17 | 18 | 19 | def credentials(auth_req: Optional[GoogleAuthRequest] = None): 20 | """Get GCP credentials.""" 21 | auth_req = auth_req or GoogleAuthRequest() 22 | creds, _ = google.auth.default( 23 | scopes=["https://www.googleapis.com/auth/cloud-platform"] 24 | ) 25 | creds.refresh(auth_req) 26 | return creds 27 | 28 | 29 | def id_token(): 30 | """Get token to authenticate against Cloud Function.""" 31 | auth_req = GoogleAuthRequest() 32 | creds = credentials(auth_req) 33 | 34 | if hasattr(creds, "id_token"): 35 | # Get token from default credentials for the current environment created via Cloud SDK run 36 | id_token = creds.id_token 37 | else: 38 | # If the environment variable GOOGLE_APPLICATION_CREDENTIALS is set to service account JSON file, 39 | # then ID token is acquired using this service account credentials. 40 | id_token = fetch_id_token(auth_req, DRY_RUN_URL) 41 | return id_token 42 | 43 | 44 | class DryRunError(Exception): 45 | """Exception raised on dry run errors.""" 46 | 47 | def __init__(self, message, error, use_cloud_function, table_id): 48 | """Initialize DryRunError.""" 49 | super().__init__(message) 50 | self.error = error 51 | self.use_cloud_function = use_cloud_function 52 | self.table_id = table_id 53 | 54 | def __reduce__(self): 55 | """ 56 | Override to ensure that all parameters are being passed when pickling. 57 | 58 | Pickling happens when passing exception between processes (e.g. via multiprocessing) 59 | """ 60 | return ( 61 | self.__class__, 62 | self.args + (self.error, self.use_cloud_function, self.table_id), 63 | ) 64 | 65 | 66 | class Errors(Enum): 67 | """DryRun errors that require special handling.""" 68 | 69 | READ_ONLY = 1 70 | DATE_FILTER_NEEDED = 2 71 | DATE_FILTER_NEEDED_AND_SYNTAX = 3 72 | PERMISSION_DENIED = 4 73 | 74 | 75 | class DryRunContext: 76 | """DryRun builder class.""" 77 | 78 | def __init__( 79 | self, 80 | use_cloud_function=False, 81 | id_token=None, 82 | credentials=None, 83 | dry_run_url=DRY_RUN_URL, 84 | ): 85 | """Initialize dry run instance.""" 86 | self.use_cloud_function = use_cloud_function 87 | self.dry_run_url = dry_run_url 88 | self.id_token = id_token 89 | self.credentials = credentials 90 | 91 | def create( 92 | self, 93 | sql=None, 94 | project="moz-fx-data-shared-prod", 95 | dataset=None, 96 | table=None, 97 | ): 98 | """Initialize a DryRun instance.""" 99 | return DryRun( 100 | use_cloud_function=self.use_cloud_function, 101 | id_token=self.id_token, 102 | credentials=self.credentials, 103 | sql=sql, 104 | project=project, 105 | dataset=dataset, 106 | table=table, 107 | dry_run_url=self.dry_run_url, 108 | ) 109 | 110 | 111 | class DryRun: 112 | """Dry run SQL.""" 113 | 114 | def __init__( 115 | self, 116 | use_cloud_function=False, 117 | id_token=None, 118 | credentials=None, 119 | sql=None, 120 | project="moz-fx-data-shared-prod", 121 | dataset=None, 122 | table=None, 123 | dry_run_url=DRY_RUN_URL, 124 | ): 125 | """Initialize dry run instance.""" 126 | self.sql = sql 127 | self.use_cloud_function = use_cloud_function 128 | self.project = project 129 | self.dataset = dataset 130 | self.table = table 131 | self.dry_run_url = dry_run_url 132 | self.id_token = id_token 133 | self.credentials = credentials 134 | 135 | @cached_property 136 | def client(self): 137 | """Get BigQuery client instance.""" 138 | return bigquery.Client(credentials=self.credentials) 139 | 140 | @cached_property 141 | def dry_run_result(self): 142 | """Return the dry run result.""" 143 | try: 144 | if self.use_cloud_function: 145 | json_data = { 146 | "query": self.sql or "SELECT 1", 147 | "project": self.project, 148 | "dataset": self.dataset or "telemetry", 149 | } 150 | 151 | if self.table: 152 | json_data["table"] = self.table 153 | 154 | r = urlopen( 155 | Request( 156 | self.dry_run_url, 157 | headers={ 158 | "Content-Type": "application/json", 159 | "Authorization": f"Bearer {self.id_token}", 160 | }, 161 | data=json.dumps(json_data).encode("utf8"), 162 | method="POST", 163 | ) 164 | ) 165 | return json.load(r) 166 | else: 167 | query_schema = None 168 | referenced_tables = [] 169 | table_metadata = None 170 | 171 | if self.sql: 172 | job_config = bigquery.QueryJobConfig( 173 | dry_run=True, 174 | use_query_cache=False, 175 | query_parameters=[ 176 | bigquery.ScalarQueryParameter( 177 | "submission_date", "DATE", "2019-01-01" 178 | ) 179 | ], 180 | ) 181 | 182 | if self.project: 183 | job_config.connection_properties = [ 184 | bigquery.ConnectionProperty( 185 | "dataset_project_id", self.project 186 | ) 187 | ] 188 | 189 | job = self.client.query(self.sql, job_config=job_config) 190 | query_schema = ( 191 | job._properties.get("statistics", {}) 192 | .get("query", {}) 193 | .get("schema", {}) 194 | ) 195 | referenced_tables = [ 196 | ref.to_api_repr() for ref in job.referenced_tables 197 | ] 198 | 199 | if ( 200 | self.project is not None 201 | and self.table is not None 202 | and self.dataset is not None 203 | ): 204 | table = self.client.get_table( 205 | f"{self.project}.{self.dataset}.{self.table}" 206 | ) 207 | table_metadata = { 208 | "tableType": table.table_type, 209 | "friendlyName": table.friendly_name, 210 | "schema": { 211 | "fields": [field.to_api_repr() for field in table.schema] 212 | }, 213 | } 214 | 215 | return { 216 | "valid": True, 217 | "referencedTables": referenced_tables, 218 | "schema": query_schema, 219 | "tableMetadata": table_metadata, 220 | } 221 | except Exception as e: 222 | print(f"ERROR {e}") 223 | return None 224 | 225 | def get_schema(self): 226 | """Return the query schema by dry running the SQL file.""" 227 | self.validate() 228 | 229 | if ( 230 | self.dry_run_result 231 | and self.dry_run_result["valid"] 232 | and "schema" in self.dry_run_result 233 | ): 234 | return self.dry_run_result["schema"]["fields"] 235 | 236 | return [] 237 | 238 | def get_table_schema(self): 239 | """Return the schema of the provided table.""" 240 | self.validate() 241 | 242 | if ( 243 | self.dry_run_result 244 | and self.dry_run_result["valid"] 245 | and "tableMetadata" in self.dry_run_result 246 | ): 247 | return self.dry_run_result["tableMetadata"]["schema"]["fields"] 248 | 249 | return [] 250 | 251 | def get_table_metadata(self): 252 | """Return table metadata.""" 253 | self.validate() 254 | 255 | if ( 256 | self.dry_run_result 257 | and self.dry_run_result["valid"] 258 | and "tableMetadata" in self.dry_run_result 259 | ): 260 | return self.dry_run_result["tableMetadata"] 261 | 262 | return {} 263 | 264 | def validate(self): 265 | """Dry run the provided SQL file and check if valid.""" 266 | dry_run_error = DryRunError( 267 | "Error when dry running SQL", 268 | self.get_error(), 269 | self.use_cloud_function, 270 | self.table, 271 | ) 272 | 273 | if self.dry_run_result is None: 274 | raise dry_run_error 275 | 276 | if self.dry_run_result["valid"]: 277 | return True 278 | elif self.get_error() == Errors.READ_ONLY: 279 | # We want the dryrun service to only have read permissions, so 280 | # we expect CREATE VIEW and CREATE TABLE to throw specific 281 | # exceptions. 282 | return True 283 | elif self.get_error() == Errors.DATE_FILTER_NEEDED: 284 | # With strip_dml flag, some queries require a partition filter 285 | # (submission_date, submission_timestamp, etc.) to run 286 | return True 287 | else: 288 | print("ERROR\n", self.dry_run_result["errors"]) 289 | raise dry_run_error 290 | 291 | def errors(self): 292 | """Dry run the provided SQL file and return errors.""" 293 | if self.dry_run_result is None: 294 | return [] 295 | return self.dry_run_result.get("errors", []) 296 | 297 | def get_error(self) -> Optional[Errors]: 298 | """Get specific errors for edge case handling.""" 299 | errors = self.errors() 300 | if len(errors) != 1: 301 | return None 302 | 303 | error = errors[0] 304 | if error and error.get("code") in [400, 403]: 305 | error_message = error.get("message", "") 306 | if ( 307 | "does not have bigquery.tables.create permission for dataset" 308 | in error_message 309 | or "Permission bigquery.tables.create denied" in error_message 310 | or "Permission bigquery.datasets.update denied" in error_message 311 | ): 312 | return Errors.READ_ONLY 313 | if "without a filter over column(s)" in error_message: 314 | return Errors.DATE_FILTER_NEEDED 315 | if ( 316 | "Syntax error: Expected end of input but got keyword WHERE" 317 | in error_message 318 | ): 319 | return Errors.DATE_FILTER_NEEDED_AND_SYNTAX 320 | if ( 321 | "Permission bigquery.tables.get denied on table" in error_message 322 | or "User does not have permission to query table" in error_message 323 | ): 324 | return Errors.PERMISSION_DENIED 325 | return None 326 | -------------------------------------------------------------------------------- /generator/explores/__init__.py: -------------------------------------------------------------------------------- 1 | """All possible explore types.""" 2 | 3 | from .explore import Explore # noqa: F401 isort:skip 4 | from .client_counts_explore import ClientCountsExplore 5 | from .events_explore import EventsExplore 6 | from .funnel_analysis_explore import FunnelAnalysisExplore 7 | from .glean_ping_explore import GleanPingExplore 8 | from .growth_accounting_explore import GrowthAccountingExplore 9 | from .metric_definitions_explore import MetricDefinitionsExplore 10 | from .operational_monitoring_explore import ( 11 | OperationalMonitoringAlertingExplore, 12 | OperationalMonitoringExplore, 13 | ) 14 | from .ping_explore import PingExplore 15 | from .table_explore import TableExplore 16 | 17 | EXPLORE_TYPES = { 18 | ClientCountsExplore.type: ClientCountsExplore, 19 | EventsExplore.type: EventsExplore, 20 | FunnelAnalysisExplore.type: FunnelAnalysisExplore, 21 | GleanPingExplore.type: GleanPingExplore, 22 | PingExplore.type: PingExplore, 23 | GrowthAccountingExplore.type: GrowthAccountingExplore, 24 | MetricDefinitionsExplore.type: MetricDefinitionsExplore, 25 | OperationalMonitoringExplore.type: OperationalMonitoringExplore, 26 | OperationalMonitoringAlertingExplore.type: OperationalMonitoringAlertingExplore, 27 | TableExplore.type: TableExplore, 28 | } 29 | -------------------------------------------------------------------------------- /generator/explores/client_counts_explore.py: -------------------------------------------------------------------------------- 1 | """Client Counts explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import View 9 | from . import Explore 10 | 11 | 12 | class ClientCountsExplore(Explore): 13 | """A Client Counts Explore, from Baseline Clients Last Seen.""" 14 | 15 | type: str = "client_counts_explore" 16 | 17 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 18 | """Generate LookML to represent this explore.""" 19 | queries = [] 20 | if time_partitioning_group := self.get_view_time_partitioning_group( 21 | self.views["extended_view"] 22 | ): 23 | date_dimension = f"{time_partitioning_group}_date" 24 | queries.append( 25 | { 26 | "description": "Client Counts of weekly cohorts over the past N days.", 27 | "dimensions": ["days_since_first_seen", "first_seen_week"], 28 | "measures": ["client_count"], 29 | "pivots": ["first_seen_week"], 30 | "filters": [ 31 | {date_dimension: "8 weeks"}, 32 | {"first_seen_date": "8 weeks"}, 33 | {"have_completed_period": "yes"}, 34 | ], 35 | "sorts": [{"days_since_first_seen": "asc"}], 36 | "name": "cohort_analysis", 37 | } 38 | ) 39 | if self.has_view_dimension(self.views["extended_view"], "app_build"): 40 | queries.append( 41 | { 42 | "description": "Number of clients per build.", 43 | "dimensions": [date_dimension, "app_build"], 44 | "measures": ["client_count"], 45 | "pivots": ["app_build"], 46 | "sorts": [{date_dimension: "asc"}], 47 | "name": "build_breakdown", 48 | } 49 | ) 50 | 51 | explore_lookml = { 52 | "name": self.name, 53 | "view_name": self.views["base_view"], 54 | "description": "Client counts across dimensions and cohorts.", 55 | "always_filter": { 56 | "filters": self.get_required_filters("extended_view"), 57 | }, 58 | "queries": queries, 59 | "joins": self.get_unnested_fields_joins_lookml(), 60 | } 61 | 62 | if datagroup := self.get_datagroup(): 63 | explore_lookml["persist_with"] = datagroup 64 | 65 | return [explore_lookml] 66 | 67 | @staticmethod 68 | def from_views(views: List[View]) -> Iterator[ClientCountsExplore]: 69 | """ 70 | If possible, generate a Client Counts explore for this namespace. 71 | 72 | Client counts explores are only created for client_counts views. 73 | """ 74 | for view in views: 75 | if view.name == "client_counts": 76 | yield ClientCountsExplore( 77 | view.name, 78 | { 79 | "base_view": "client_counts", 80 | "extended_view": "baseline_clients_daily_table", 81 | }, 82 | ) 83 | 84 | @staticmethod 85 | def from_dict(name: str, defn: dict, views_path: Path) -> ClientCountsExplore: 86 | """Get an instance of this explore from a dictionary definition.""" 87 | return ClientCountsExplore(name, defn["views"], views_path) 88 | -------------------------------------------------------------------------------- /generator/explores/events_explore.py: -------------------------------------------------------------------------------- 1 | """An explore for Events Views.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import EventsView, View 9 | from .explore import Explore 10 | 11 | 12 | class EventsExplore(Explore): 13 | """An Events Explore, from any unnested events table.""" 14 | 15 | type: str = "events_explore" 16 | 17 | @staticmethod 18 | def from_views(views: List[View]) -> Iterator[EventsExplore]: 19 | """Where possible, generate EventsExplores for Views.""" 20 | for view in views: 21 | if isinstance(view, EventsView): 22 | yield EventsExplore( 23 | view.name, 24 | { 25 | "base_view": "events", 26 | "extended_view": view.tables[0]["events_table_view"], 27 | }, 28 | ) 29 | 30 | @staticmethod 31 | def from_dict(name: str, defn: dict, views_path: Path) -> EventsExplore: 32 | """Get an instance of this explore from a dictionary definition.""" 33 | return EventsExplore(name, defn["views"], views_path) 34 | 35 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 36 | name = self.name 37 | if not name.endswith("_counts"): 38 | name = "event_counts" 39 | 40 | lookml: Dict[str, Any] = { 41 | "name": name, 42 | "view_name": self.views["base_view"], 43 | "description": "Event counts over time.", 44 | "joins": self.get_unnested_fields_joins_lookml(), 45 | } 46 | if required_filters := self.get_required_filters("extended_view"): 47 | lookml["always_filter"] = {"filters": required_filters} 48 | if time_partitioning_group := self.get_view_time_partitioning_group( 49 | self.views["extended_view"] 50 | ): 51 | date_dimension = f"{time_partitioning_group}_date" 52 | lookml["queries"] = [ 53 | { 54 | "description": "Event counts from all events over the past two weeks.", 55 | "dimensions": [date_dimension], 56 | "measures": ["event_count"], 57 | "filters": [ 58 | {date_dimension: "14 days"}, 59 | ], 60 | "name": "all_event_counts", 61 | }, 62 | ] 63 | 64 | if datagroup := self.get_datagroup(): 65 | lookml["persist_with"] = datagroup 66 | 67 | return [lookml] 68 | -------------------------------------------------------------------------------- /generator/explores/explore.py: -------------------------------------------------------------------------------- 1 | """Generic explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field 6 | from pathlib import Path 7 | from typing import Any, Dict, List, Optional, Tuple 8 | 9 | import lkml 10 | 11 | from ..views.lookml_utils import escape_filter_expr, slug_to_title 12 | 13 | 14 | @dataclass 15 | class Explore: 16 | """A generic explore.""" 17 | 18 | name: str 19 | views: Dict[str, str] 20 | views_path: Optional[Path] = None 21 | defn: Optional[Dict[str, str]] = None 22 | type: str = field(init=False) 23 | 24 | def to_dict(self) -> dict: 25 | """Explore instance represented as a dict.""" 26 | return {self.name: {"type": self.type, "views": self.views}} 27 | 28 | def to_lookml( 29 | self, v1_name: Optional[str], hidden: Optional[bool] 30 | ) -> List[Dict[str, Any]]: 31 | """ 32 | Generate LookML for this explore. 33 | 34 | Any generation done in dependent explore's 35 | `_to_lookml` takes precedence over these fields. 36 | """ 37 | base_lookml = {} 38 | if hidden: 39 | base_lookml["hidden"] = "yes" 40 | base_view_name = next( 41 | ( 42 | view_name 43 | for view_type, view_name in self.views.items() 44 | if view_type == "base_view" 45 | ) 46 | ) 47 | for view_type, view in self.views.items(): 48 | # We look at our dependent views to see if they have a 49 | # "submission" field. Dependent views are any that are: 50 | # - base_view 51 | # - extended_view* 52 | # 53 | # We do not want to look at joined views. Those should be 54 | # labeled as: 55 | # - join* 56 | # 57 | # If they have a submission field, we filter on the date. 58 | # This allows for filter queries to succeed. 59 | if "join" in view_type: 60 | continue 61 | if time_partitioning_group := self.get_view_time_partitioning_group(view): 62 | base_lookml["sql_always_where"] = ( 63 | f"${{{base_view_name}.{time_partitioning_group}_date}} >= '2010-01-01'" 64 | ) 65 | 66 | # We only update the first returned explore 67 | new_lookml = self._to_lookml(v1_name) 68 | base_lookml.update(new_lookml[0]) 69 | new_lookml[0] = base_lookml 70 | 71 | return new_lookml 72 | 73 | def _to_lookml( 74 | self, 75 | v1_name: Optional[str], 76 | ) -> List[Dict[str, Any]]: 77 | raise NotImplementedError("Only implemented in subclasses") 78 | 79 | def get_dependent_views(self) -> List[str]: 80 | """Get views this explore is dependent on.""" 81 | dependent_views = [] 82 | for _type, views in self.views.items(): 83 | if _type.startswith("extended"): 84 | continue 85 | elif _type.startswith("joined"): 86 | dependent_views += [view for view in views] 87 | else: 88 | dependent_views.append(views) 89 | return dependent_views 90 | 91 | @staticmethod 92 | def from_dict(name: str, defn: dict, views_path: Path) -> Explore: 93 | """Get an instance of an explore from a namespace definition.""" 94 | raise NotImplementedError("Only implemented in subclasses") 95 | 96 | def get_view_lookml(self, view: str) -> dict: 97 | """Get the LookML for a view.""" 98 | if self.views_path is not None: 99 | return lkml.load((self.views_path / f"{view}.view.lkml").read_text()) 100 | 101 | raise Exception("Missing view path for get_view_lookml") 102 | 103 | def get_datagroup(self) -> Optional[str]: 104 | """ 105 | Return the name of the associated datagroup. 106 | 107 | Return `None` if there is no datagroup for this explore. 108 | """ 109 | if self.views_path and (self.views_path.parent / "datagroups").exists(): 110 | datagroups_path = self.views_path.parent / "datagroups" 111 | datagroup_file = ( 112 | datagroups_path 113 | / f'{self.views["base_view"]}_last_updated.datagroup.lkml' 114 | ) 115 | if datagroup_file.exists(): 116 | return f'{self.views["base_view"]}_last_updated' 117 | return None 118 | 119 | def get_unnested_fields_joins_lookml( 120 | self, 121 | ) -> list: 122 | """Get the LookML for joining unnested fields.""" 123 | views_lookml = self.get_view_lookml(self.views["base_view"]) 124 | views: List[str] = [view["name"] for view in views_lookml["views"]] 125 | parent_base_name = views_lookml["views"][0]["name"] 126 | 127 | extended_views: List[str] = [] 128 | if "extended_view" in self.views: 129 | # check for extended views 130 | extended_views_lookml = self.get_view_lookml(self.views["extended_view"]) 131 | extended_views = [view["name"] for view in extended_views_lookml["views"]] 132 | 133 | views_lookml.update(extended_views_lookml) 134 | views += extended_views 135 | 136 | joins = [] 137 | for view in views_lookml["views"][1:]: 138 | view_name = view["name"] 139 | # get repeated, nested fields that exist as separate views in lookml 140 | base_name, metric = self._get_base_name_and_metric( 141 | view_name=view_name, views=views 142 | ) 143 | metric_name = view_name 144 | metric_label = slug_to_title(metric_name) 145 | 146 | if view_name in extended_views: 147 | # names of extended views are overriden by the name of the view that is extending them 148 | metric_label = slug_to_title( 149 | metric_name.replace(base_name, parent_base_name) 150 | ) 151 | base_name = parent_base_name 152 | 153 | joins.append( 154 | { 155 | "name": view_name, 156 | "view_label": metric_label, 157 | "relationship": "one_to_many", 158 | "sql": ( 159 | f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {metric_name} " 160 | ), 161 | } 162 | ) 163 | 164 | return joins 165 | 166 | def _get_default_channel(self, view: str) -> Optional[str]: 167 | channel_params = [ 168 | param 169 | for _view_defn in self.get_view_lookml(view)["views"] 170 | for param in _view_defn.get("filters", []) 171 | if _view_defn["name"] == view and param["name"] == "channel" 172 | ] 173 | 174 | if channel_params: 175 | allowed_values = channel_params[0]["suggestions"] 176 | default_value = allowed_values[0] 177 | return escape_filter_expr(default_value) 178 | return None 179 | 180 | def _get_base_name_and_metric( 181 | self, view_name: str, views: List[str] 182 | ) -> Tuple[str, str]: 183 | """ 184 | Get base view and metric names. 185 | 186 | Returns the the name of the base view and the metric based on the 187 | passed `view_name` and existing views. 188 | 189 | The names are resolved in a backwards fashion to account for 190 | repeated nested fields that might contain other nested fields. 191 | For example: 192 | 193 | view: sync { 194 | [...] 195 | dimension: payload__events { 196 | sql: ${TABLE}.payload.events ;; 197 | } 198 | } 199 | 200 | view: sync__payload__events { 201 | [...] 202 | dimension: f5_ { 203 | sql: ${TABLE}.f5_ ;; 204 | } 205 | } 206 | 207 | view: sync__payload__events__f5_ { 208 | [...] 209 | } 210 | 211 | For these nested views to get translated to the following joins, the names 212 | need to be resolved backwards: 213 | 214 | join: sync__payload__events { 215 | relationship: one_to_many 216 | sql: LEFT JOIN UNNEST(${sync.payload__events}) AS sync__payload__events ;; 217 | } 218 | 219 | join: sync__payload__events__f5_ { 220 | relationship: one_to_many 221 | sql: LEFT JOIN UNNEST(${sync__payload__events.f5_}) AS sync__payload__events__f5_ ;; 222 | } 223 | """ 224 | split = view_name.split("__") 225 | for index in range(len(split) - 1, 0, -1): 226 | base_view = "__".join(split[:index]) 227 | metric = "__".join(split[index:]) 228 | if base_view in views: 229 | return (base_view, metric) 230 | raise Exception(f"Cannot get base name and metric from view {view_name}") 231 | 232 | def has_view_dimension(self, view: str, dimension_name: str) -> bool: 233 | """Determine whether a this view has this dimension.""" 234 | for _view_defn in self.get_view_lookml(view)["views"]: 235 | if _view_defn["name"] != view: 236 | continue 237 | for dim in _view_defn.get("dimensions", []): 238 | if dim["name"] == dimension_name: 239 | return True 240 | return False 241 | 242 | def get_view_time_partitioning_group(self, view: str) -> Optional[str]: 243 | """Get time partitiong dimension group for this view. 244 | 245 | Return the name of the first dimension group tagged "time_partitioning_field", 246 | and fall back to "submission" if available. 247 | """ 248 | has_submission = False 249 | for _view_defn in self.get_view_lookml(view)["views"]: 250 | if not _view_defn["name"] == view: 251 | continue 252 | for dim in _view_defn.get("dimension_groups", []): 253 | if "time_partitioning_field" in dim.get("tags", []): 254 | return dim["name"] 255 | elif dim["name"] == "submission": 256 | has_submission = True 257 | if has_submission: 258 | return "submission" 259 | return None 260 | 261 | def get_required_filters(self, view_name: str) -> List[Dict[str, str]]: 262 | """Get required filters for this view.""" 263 | filters = [] 264 | view = self.views[view_name] 265 | 266 | # Add a default filter on channel, if it's present in the view 267 | default_channel = self._get_default_channel(view) 268 | if default_channel is not None: 269 | filters.append({"channel": default_channel}) 270 | 271 | # Add submission filter, if present in the view 272 | if time_partitioning_group := self.get_view_time_partitioning_group(view): 273 | filters.append({f"{time_partitioning_group}_date": "28 days"}) 274 | 275 | return filters 276 | 277 | def __eq__(self, other) -> bool: 278 | """Check for equality with other View.""" 279 | 280 | def comparable_dict(d): 281 | return tuple(sorted(d.items())) 282 | 283 | if isinstance(other, Explore): 284 | return ( 285 | self.name == other.name 286 | and comparable_dict(self.views) == comparable_dict(other.views) 287 | and self.type == other.type 288 | ) 289 | return False 290 | -------------------------------------------------------------------------------- /generator/explores/funnel_analysis_explore.py: -------------------------------------------------------------------------------- 1 | """Funnel Analysis explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import View 9 | from . import Explore 10 | 11 | 12 | class FunnelAnalysisExplore(Explore): 13 | """A Funnel Analysis Explore, from Baseline Clients Last Seen.""" 14 | 15 | type: str = "funnel_analysis_explore" 16 | n_funnel_steps: int = 4 17 | 18 | @staticmethod 19 | def from_views(views: List[View]) -> Iterator[FunnelAnalysisExplore]: 20 | """ 21 | If possible, generate a Funnel Analysis explore for this namespace. 22 | 23 | Funnel analysis explores are only created for funnel_analysis views. 24 | """ 25 | for view in views: 26 | if view.name == "funnel_analysis": 27 | yield FunnelAnalysisExplore( 28 | "funnel_analysis", 29 | {"base_view": view.name}, 30 | ) 31 | 32 | @staticmethod 33 | def from_dict(name: str, defn: dict, views_path: Path) -> FunnelAnalysisExplore: 34 | """Get an instance of this explore from a dictionary definition.""" 35 | return FunnelAnalysisExplore(name, defn["views"], views_path) 36 | 37 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 38 | view_lookml = self.get_view_lookml("funnel_analysis") 39 | views = view_lookml["views"] 40 | n_events = len([d for d in views if d["name"].startswith("step_")]) 41 | 42 | explore_lookml = { 43 | "name": "funnel_analysis", 44 | "description": "Count funnel completion over time. Funnels are limited to a single day.", 45 | "view_label": " User-Day Funnels", 46 | "always_filter": { 47 | "filters": [ 48 | {"submission_date": "14 days"}, 49 | ] 50 | }, 51 | "joins": [ 52 | { 53 | "name": f"step_{n}", 54 | "relationship": "many_to_one", 55 | "type": "cross", 56 | } 57 | for n in range(1, n_events + 1) 58 | ], 59 | "sql_always_where": "${funnel_analysis.submission_date} >= '2010-01-01'", 60 | } 61 | 62 | if datagroup := self.get_datagroup(): 63 | explore_lookml["persist_with"] = datagroup 64 | 65 | defn: List[Dict[str, Any]] = [ 66 | explore_lookml, 67 | {"name": "event_names", "hidden": "yes"}, 68 | ] 69 | 70 | return defn 71 | -------------------------------------------------------------------------------- /generator/explores/glean_ping_explore.py: -------------------------------------------------------------------------------- 1 | """Glean Ping explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from mozilla_schema_generator.glean_ping import GleanPing 9 | 10 | from ..views import GleanPingView, View 11 | from .ping_explore import PingExplore 12 | 13 | 14 | class GleanPingExplore(PingExplore): 15 | """A Glean Ping Table explore.""" 16 | 17 | type: str = "glean_ping_explore" 18 | 19 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 20 | """Generate LookML to represent this explore.""" 21 | repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 22 | glean_app = GleanPing(repo) 23 | # convert ping description indexes to snake case, as we already have 24 | # for the explore name 25 | ping_descriptions = { 26 | k.replace("-", "_"): v for k, v in glean_app.get_ping_descriptions().items() 27 | } 28 | # collapse whitespace in the description so the lookml looks a little better 29 | ping_description = " ".join(ping_descriptions.get(self.name, "").split()) 30 | views_lookml = self.get_view_lookml(self.views["base_view"]) 31 | 32 | # The first view, by convention, is always the base view with the 33 | # majority of the dimensions from the top level. 34 | base = views_lookml["views"][0] 35 | base_name = base["name"] 36 | 37 | joins = [] 38 | for view in views_lookml["views"][1:]: 39 | if view["name"].startswith("suggest__"): 40 | continue 41 | view_name = view["name"] 42 | metric = "__".join(view["name"].split("__")[1:]) 43 | 44 | if "__labeled_counter__" in metric: 45 | joins.append( 46 | { 47 | "name": view_name, 48 | "relationship": "one_to_many", 49 | "sql": ( 50 | f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {view_name} " 51 | f"ON ${{{base_name}.document_id}} = ${{{view_name}.document_id}}" 52 | ), 53 | } 54 | ) 55 | else: 56 | if metric.startswith("metrics__"): 57 | continue 58 | 59 | try: 60 | # get repeated, nested fields that exist as separate views in lookml 61 | base_name, metric = self._get_base_name_and_metric( 62 | view_name=view_name, 63 | views=[v["name"] for v in views_lookml["views"]], 64 | ) 65 | metric_name = view_name 66 | 67 | joins.append( 68 | { 69 | "name": view_name, 70 | "relationship": "one_to_many", 71 | "sql": ( 72 | f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {metric_name} " 73 | ), 74 | } 75 | ) 76 | except Exception: 77 | # ignore nested views that cannot be joined on to the base view 78 | continue 79 | 80 | base_explore: Dict[str, Any] = { 81 | "name": self.name, 82 | # list the base explore first by prefixing with a space 83 | "view_label": f" {self.name.title()}", 84 | "description": f"Explore for the {self.name} ping. {ping_description}", 85 | "view_name": self.views["base_view"], 86 | "joins": joins, 87 | } 88 | 89 | if datagroup := self.get_datagroup(): 90 | base_explore["persist_with"] = datagroup 91 | 92 | required_filters = self.get_required_filters("base_view") 93 | if len(required_filters) > 0: 94 | base_explore["always_filter"] = {"filters": required_filters} 95 | 96 | suggests = [] 97 | for view in views_lookml["views"][1:]: 98 | if not view["name"].startswith("suggest__"): 99 | continue 100 | suggests.append({"name": view["name"], "hidden": "yes"}) 101 | 102 | return [base_explore] + suggests 103 | 104 | @staticmethod 105 | def from_views(views: List[View]) -> Iterator[PingExplore]: 106 | """Generate all possible GleanPingExplores from the views.""" 107 | for view in views: 108 | if view.view_type == GleanPingView.type: 109 | yield GleanPingExplore(view.name, {"base_view": view.name}) 110 | 111 | @staticmethod 112 | def from_dict(name: str, defn: dict, views_path: Path) -> GleanPingExplore: 113 | """Get an instance of this explore from a name and dictionary definition.""" 114 | return GleanPingExplore(name, defn["views"], views_path) 115 | -------------------------------------------------------------------------------- /generator/explores/growth_accounting_explore.py: -------------------------------------------------------------------------------- 1 | """Growth Accounting explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import View 9 | from . import Explore 10 | 11 | 12 | class GrowthAccountingExplore(Explore): 13 | """A Growth Accounting Explore, from Baseline Clients Last Seen.""" 14 | 15 | type: str = "growth_accounting_explore" 16 | 17 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 18 | """Generate LookML to represent this explore.""" 19 | explore_lookml = { 20 | "name": self.name, 21 | "view_name": self.views["base_view"], 22 | "joins": self.get_unnested_fields_joins_lookml(), 23 | } 24 | 25 | if datagroup := self.get_datagroup(): 26 | explore_lookml["persist_with"] = datagroup 27 | 28 | return [explore_lookml] 29 | 30 | @staticmethod 31 | def from_views(views: List[View]) -> Iterator[GrowthAccountingExplore]: 32 | """ 33 | If possible, generate a Growth Accounting explore for this namespace. 34 | 35 | Growth accounting explores are only created for growth_accounting views. 36 | """ 37 | for view in views: 38 | if view.name == "growth_accounting": 39 | yield GrowthAccountingExplore( 40 | view.name, 41 | {"base_view": "growth_accounting"}, 42 | ) 43 | 44 | @staticmethod 45 | def from_dict(name: str, defn: dict, views_path: Path) -> GrowthAccountingExplore: 46 | """Get an instance of this explore from a dictionary definition.""" 47 | return GrowthAccountingExplore(name, defn["views"], views_path) 48 | -------------------------------------------------------------------------------- /generator/explores/metric_definitions_explore.py: -------------------------------------------------------------------------------- 1 | """Metric Hub metrics explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import View 9 | from . import Explore 10 | 11 | 12 | class MetricDefinitionsExplore(Explore): 13 | """Metric Hub Metrics Explore.""" 14 | 15 | type: str = "metric_definitions_explore" 16 | 17 | def __init__( 18 | self, 19 | name: str, 20 | views: Dict[str, str], 21 | views_path: Optional[Path] = None, 22 | defn: Optional[Dict[str, Any]] = None, 23 | ): 24 | """Initialize MetricDefinitionsExplore.""" 25 | super().__init__(name, views, views_path) 26 | 27 | @staticmethod 28 | def from_views(views: List[View]) -> Iterator[Explore]: 29 | """Generate an Operational Monitoring explore for this namespace.""" 30 | for view in views: 31 | if view.view_type == "metric_definitions_view": 32 | yield MetricDefinitionsExplore("metric_definitions", {}) 33 | 34 | @staticmethod 35 | def from_dict(name: str, defn: dict, views_path: Path) -> MetricDefinitionsExplore: 36 | """Get an instance of this explore from a dictionary definition.""" 37 | return MetricDefinitionsExplore(name, defn["views"], views_path, defn) 38 | 39 | def _to_lookml( 40 | self, 41 | _v1_name: Optional[str], 42 | ) -> List[Dict[str, Any]]: 43 | exposed_fields = ["ALL_FIELDS*"] 44 | 45 | explore_lookml: Dict[str, Any] = { 46 | "name": self.name, 47 | "always_filter": { 48 | "filters": [{"submission_date": "7 days"}, {"sampling": "1"}] 49 | }, 50 | # The base view is the only view that exposes the date and client_id fields. 51 | # All other views only expose the metric definitions. 52 | "fields": exposed_fields, 53 | } 54 | 55 | if datagroup := self.get_datagroup(): 56 | explore_lookml["persist_with"] = datagroup 57 | 58 | return [explore_lookml] 59 | 60 | def get_view_time_partitioning_group(self, view: str) -> Optional[str]: 61 | """Override time partitioning.""" 62 | return None 63 | -------------------------------------------------------------------------------- /generator/explores/operational_monitoring_explore.py: -------------------------------------------------------------------------------- 1 | """Operational Monitoring Explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import View 9 | from . import Explore 10 | 11 | 12 | class OperationalMonitoringExplore(Explore): 13 | """An Operational Monitoring Explore.""" 14 | 15 | type: str = "operational_monitoring_explore" 16 | 17 | def __init__( 18 | self, 19 | name: str, 20 | views: Dict[str, str], 21 | views_path: Optional[Path] = None, 22 | defn: Optional[Dict[str, Any]] = None, 23 | ): 24 | """Initialize OperationalMonitoringExplore.""" 25 | super().__init__(name, views, views_path) 26 | if defn is not None: 27 | self.branches = ", ".join(defn["branches"]) 28 | self.xaxis = defn.get("xaxis") 29 | self.dimensions = defn.get("dimensions", {}) 30 | self.summaries = defn.get("summaries", []) 31 | 32 | @staticmethod 33 | def from_views(views: List[View]) -> Iterator[Explore]: 34 | """Generate an Operational Monitoring explore for this namespace.""" 35 | for view in views: 36 | if view.view_type == "operational_monitoring_view": 37 | yield OperationalMonitoringExplore( 38 | "operational_monitoring", 39 | {"base_view": view.name}, 40 | ) 41 | 42 | @staticmethod 43 | def from_dict( 44 | name: str, defn: dict, views_path: Path 45 | ) -> OperationalMonitoringExplore: 46 | """Get an instance of this explore from a dictionary definition.""" 47 | return OperationalMonitoringExplore(name, defn["views"], views_path, defn) 48 | 49 | def _to_lookml( 50 | self, 51 | v1_name: Optional[str], 52 | ) -> List[Dict[str, Any]]: 53 | base_view_name = self.views["base_view"] 54 | 55 | filters = [ 56 | {f"{base_view_name}.branch": self.branches}, 57 | ] 58 | for dimension, info in self.dimensions.items(): 59 | if "default" in info: 60 | filters.append({f"{base_view_name}.{dimension}": info["default"]}) 61 | 62 | explore_lookml = { 63 | "name": self.views["base_view"], 64 | "always_filter": { 65 | "filters": [ 66 | {"branch": self.branches}, 67 | ] 68 | }, 69 | "hidden": "yes", 70 | } 71 | 72 | if datagroup := self.get_datagroup(): 73 | explore_lookml["persist_with"] = datagroup 74 | 75 | defn: List[Dict[str, Any]] = [explore_lookml] 76 | 77 | return defn 78 | 79 | 80 | class OperationalMonitoringAlertingExplore(Explore): 81 | """An Operational Monitoring Alerting Explore.""" 82 | 83 | type: str = "operational_monitoring_alerting_explore" 84 | 85 | def __init__( 86 | self, 87 | name: str, 88 | views: Dict[str, str], 89 | views_path: Optional[Path] = None, 90 | defn: Optional[Dict[str, Any]] = None, 91 | ): 92 | """Initialize OperationalMonitoringExplore.""" 93 | super().__init__(name, views, views_path) 94 | 95 | @staticmethod 96 | def from_views(views: List[View]) -> Iterator[Explore]: 97 | """Generate an Operational Monitoring explore for this namespace.""" 98 | for view in views: 99 | if view.view_type in { 100 | "operational_monitoring_alerting_view", 101 | }: 102 | yield OperationalMonitoringAlertingExplore( 103 | "operational_monitoring", 104 | {"base_view": view.name}, 105 | ) 106 | 107 | @staticmethod 108 | def from_dict( 109 | name: str, defn: dict, views_path: Path 110 | ) -> OperationalMonitoringAlertingExplore: 111 | """Get an instance of this explore from a dictionary definition.""" 112 | return OperationalMonitoringAlertingExplore( 113 | name, defn["views"], views_path, defn 114 | ) 115 | 116 | def _to_lookml( 117 | self, 118 | v1_name: Optional[str], 119 | ) -> List[Dict[str, Any]]: 120 | explore_lookml = {"name": self.views["base_view"], "hidden": "yes"} 121 | 122 | if datagroup := self.get_datagroup(): 123 | explore_lookml["persist_with"] = datagroup 124 | 125 | defn: List[Dict[str, Any]] = [explore_lookml] 126 | 127 | return defn 128 | -------------------------------------------------------------------------------- /generator/explores/ping_explore.py: -------------------------------------------------------------------------------- 1 | """Ping explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import PingView, View 9 | from . import Explore 10 | 11 | 12 | class PingExplore(Explore): 13 | """A Ping Table explore.""" 14 | 15 | type: str = "ping_explore" 16 | 17 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 18 | """Generate LookML to represent this explore.""" 19 | explore_lookml = { 20 | "name": self.name, 21 | "view_name": self.views["base_view"], 22 | "always_filter": { 23 | "filters": self.get_required_filters("base_view"), 24 | }, 25 | "joins": self.get_unnested_fields_joins_lookml(), 26 | } 27 | 28 | if datagroup := self.get_datagroup(): 29 | explore_lookml["persist_with"] = datagroup 30 | 31 | return [explore_lookml] 32 | 33 | @staticmethod 34 | def from_views(views: List[View]) -> Iterator[PingExplore]: 35 | """Generate all possible PingExplores from the views.""" 36 | for view in views: 37 | if view.view_type == PingView.type: 38 | yield PingExplore(view.name, {"base_view": view.name}) 39 | 40 | @staticmethod 41 | def from_dict(name: str, defn: dict, views_path: Path) -> PingExplore: 42 | """Get an instance of this explore from a name and dictionary definition.""" 43 | return PingExplore(name, defn["views"], views_path) 44 | -------------------------------------------------------------------------------- /generator/explores/table_explore.py: -------------------------------------------------------------------------------- 1 | """Table explore type.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from ..views import TableView, View 9 | from . import Explore 10 | 11 | ALLOWED_VIEWS = {"events_stream_table"} 12 | 13 | 14 | class TableExplore(Explore): 15 | """A table explore.""" 16 | 17 | type: str = "table_explore" 18 | 19 | def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]: 20 | """Generate LookML to represent this explore.""" 21 | explore_lookml: Dict[str, Any] = { 22 | "name": self.name, 23 | "view_name": self.views["base_view"], 24 | "joins": self.get_unnested_fields_joins_lookml(), 25 | } 26 | if required_filters := self.get_required_filters("base_view"): 27 | explore_lookml["always_filter"] = { 28 | "filters": required_filters, 29 | } 30 | 31 | if datagroup := self.get_datagroup(): 32 | explore_lookml["persist_with"] = datagroup 33 | 34 | return [explore_lookml] 35 | 36 | @staticmethod 37 | def from_views(views: List[View]) -> Iterator[TableExplore]: 38 | """Don't generate all possible TableExplores from the views, only generate for ALLOWED_VIEWS.""" 39 | for view in views: 40 | if view.view_type == TableView.type: 41 | if view.name in ALLOWED_VIEWS: 42 | yield TableExplore(view.name, {"base_view": view.name}) 43 | 44 | @staticmethod 45 | def from_dict(name: str, defn: dict, views_path: Path) -> TableExplore: 46 | """Get an instance of this explore from a name and dictionary definition.""" 47 | return TableExplore(name, defn["views"], views_path) 48 | -------------------------------------------------------------------------------- /generator/lkml_update.py: -------------------------------------------------------------------------------- 1 | """An updated lkml parser to handle explore queries.""" 2 | 3 | from typing import List, Union 4 | 5 | from lkml.keys import KEYS_WITH_NAME_FIELDS 6 | from lkml.simple import DictParser 7 | from lkml.tree import BlockNode, DocumentNode, ListNode, PairNode 8 | 9 | 10 | def dump(obj: dict) -> str: 11 | """Dump an object as LookML.""" 12 | parser = UpdatedDictParser() 13 | tree: DocumentNode = parser.parse(obj) 14 | return str(tree) 15 | 16 | 17 | class UpdatedDictParser(DictParser): 18 | """An updated DictParser that properly handles queries.""" 19 | 20 | def parse_any( 21 | self, key: str, value: Union[str, list, tuple, dict] 22 | ) -> Union[ 23 | List[Union[BlockNode, ListNode, PairNode]], BlockNode, ListNode, PairNode 24 | ]: 25 | """Dynamically serializes a Python object based on its type. 26 | 27 | Args: 28 | key: A LookML field type (e.g. "suggestions" or "hidden") 29 | value: A string, tuple, or list to serialize 30 | Raises: 31 | TypeError: If input value is not of a valid type 32 | Returns: 33 | A generator of serialized string chunks 34 | """ 35 | if isinstance(value, str): 36 | return self.parse_pair(key, value) 37 | elif isinstance(value, (list, tuple)): 38 | if self.is_plural_key(key) and not self.parent_key == "query": 39 | # See https://github.com/joshtemple/lkml/issues/53 40 | # We check that the parent is not a query to ensure the 41 | # query fields don't get unnested 42 | return self.expand_list(key, value) 43 | else: 44 | return self.parse_list(key, value) 45 | elif isinstance(value, dict): 46 | if key in KEYS_WITH_NAME_FIELDS or "name" not in value.keys(): 47 | name = None 48 | else: 49 | name = value.pop("name") 50 | return self.parse_block(key, value, name) 51 | else: 52 | raise TypeError("Value must be a string, list, tuple, or dict.") 53 | -------------------------------------------------------------------------------- /generator/metrics_utils.py: -------------------------------------------------------------------------------- 1 | """Utils for working with metric-hub.""" 2 | 3 | from typing import List, Optional 4 | 5 | from metric_config_parser.config import ConfigCollection 6 | from metric_config_parser.metric import MetricDefinition 7 | 8 | METRIC_HUB_REPO = "https://github.com/mozilla/metric-hub" 9 | LOOKER_METRIC_HUB_REPO = "https://github.com/mozilla/metric-hub/tree/main/looker" 10 | 11 | 12 | class _MetricsConfigLoader: 13 | """Loads metric config files from an external repository.""" 14 | 15 | config_collection: Optional[ConfigCollection] = None 16 | repos: List[str] = [METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO] 17 | 18 | @property 19 | def configs(self) -> ConfigCollection: 20 | configs = getattr(self, "_configs", None) 21 | if configs: 22 | return configs 23 | 24 | if self.config_collection is None: 25 | self.config_collection = ConfigCollection.from_github_repos(self.repos) 26 | self._configs = self.config_collection 27 | return self._configs 28 | 29 | def update_repos(self, repos: List[str]): 30 | """Change the repos to load configs from.""" 31 | self.repos = repos 32 | self.config_collection = None 33 | 34 | def metrics_of_data_source( 35 | self, data_source: str, namespace: str 36 | ) -> List[MetricDefinition]: 37 | """Get the metric definitions that use a specific data source.""" 38 | metrics = [] 39 | for definition in self.configs.definitions: 40 | if definition.platform == namespace: 41 | for _, metric_definition in definition.spec.metrics.definitions.items(): 42 | if ( 43 | metric_definition.data_source 44 | and metric_definition.data_source.name == data_source 45 | ): 46 | metrics.append(metric_definition) 47 | 48 | return metrics 49 | 50 | def data_sources_of_namespace(self, namespace: str) -> List[str]: 51 | """ 52 | Get the data source slugs in the specified namespace. 53 | 54 | Filter out data sources that are unused. 55 | """ 56 | data_sources = [] 57 | for definition in self.configs.definitions: 58 | for data_source_slug in definition.spec.data_sources.definitions.keys(): 59 | if ( 60 | definition.platform == namespace 61 | and len( 62 | MetricsConfigLoader.metrics_of_data_source( 63 | data_source_slug, definition.platform 64 | ) 65 | ) 66 | > 0 67 | ): 68 | data_sources.append(data_source_slug) 69 | 70 | return data_sources 71 | 72 | 73 | MetricsConfigLoader = _MetricsConfigLoader() 74 | -------------------------------------------------------------------------------- /generator/operational_monitoring_utils.py: -------------------------------------------------------------------------------- 1 | """Utils for operational monitoring.""" 2 | 3 | from multiprocessing.pool import ThreadPool 4 | from typing import Any, Dict, List, Optional, Tuple 5 | 6 | from google.api_core import exceptions 7 | from google.cloud import bigquery 8 | 9 | from .views import lookml_utils 10 | 11 | 12 | def _default_helper( 13 | bq_client: bigquery.Client, table: str, dimension: str 14 | ) -> Tuple[Optional[str], dict]: 15 | query_job = bq_client.query( 16 | f""" 17 | SELECT DISTINCT {dimension} AS option, COUNT(*) 18 | FROM {table} 19 | WHERE {dimension} IS NOT NULL 20 | GROUP BY 1 21 | ORDER BY 2 DESC 22 | LIMIT 10 23 | """ 24 | ) 25 | 26 | dimension_options = list(query_job.result()) 27 | 28 | if len(dimension_options) > 0: 29 | return dimension, { 30 | "default": dimension_options[0]["option"], 31 | "options": [d["option"] for d in dimension_options], 32 | } 33 | return None, {} 34 | 35 | 36 | def get_dimension_defaults( 37 | bq_client: bigquery.Client, table: str, dimensions: List[str] 38 | ) -> Dict[str, Any]: 39 | """ 40 | Find default values for certain dimensions. 41 | 42 | For a given Operational Monitoring dimension, find its default (most common) 43 | value and its top 10 most common to be used as dropdown options. 44 | """ 45 | with ThreadPool(4) as pool: 46 | return { 47 | key: value 48 | for key, value in pool.starmap( 49 | _default_helper, 50 | [[bq_client, table, dimension] for dimension in dimensions], 51 | ) 52 | if key is not None 53 | } 54 | 55 | 56 | def get_xaxis_val(table: str, dryrun) -> str: 57 | """ 58 | Return whether the x-axis should be build_id or submission_date. 59 | 60 | This is based on which one is found in the table provided. 61 | """ 62 | all_dimensions = lookml_utils._generate_dimensions(table, dryrun=dryrun) 63 | return ( 64 | "build_id" 65 | if "build_id" in {dimension["name"] for dimension in all_dimensions} 66 | else "submission_date" 67 | ) 68 | 69 | 70 | def get_active_projects( 71 | bq_client: bigquery.Client, project_table: str 72 | ) -> List[Dict[str, Any]]: 73 | """Select all operational monitoring projects.""" 74 | try: 75 | query_job = bq_client.query( 76 | f""" 77 | SELECT * 78 | FROM `{project_table}` 79 | WHERE 80 | end_date > CURRENT_DATE() OR 81 | end_date IS NULL 82 | """ 83 | ) 84 | 85 | projects = [dict(row) for row in query_job.result()] 86 | except exceptions.Forbidden: 87 | projects = [] 88 | return projects 89 | -------------------------------------------------------------------------------- /generator/spoke.py: -------------------------------------------------------------------------------- 1 | """Generate directories and models for new namespaces.""" 2 | 3 | import logging 4 | import os 5 | import shutil 6 | from collections import defaultdict 7 | from pathlib import Path 8 | from typing import Dict, List, TypedDict 9 | 10 | import click 11 | import lkml 12 | import looker_sdk 13 | import yaml 14 | 15 | from .lookml import ViewDict 16 | 17 | MODEL_SETS_BY_INSTANCE: Dict[str, List[str]] = { 18 | "https://mozilladev.cloud.looker.com": ["mozilla_confidential"], 19 | "https://mozillastaging.cloud.looker.com": ["mozilla_confidential"], 20 | "https://mozilla.cloud.looker.com": ["mozilla_confidential"], 21 | } 22 | 23 | DEFAULT_DB_CONNECTION = "telemetry" 24 | 25 | 26 | class ExploreDict(TypedDict): 27 | """Represent an explore definition.""" 28 | 29 | type: str 30 | views: List[Dict[str, str]] 31 | 32 | 33 | class NamespaceDict(TypedDict): 34 | """Represent a Namespace definition.""" 35 | 36 | views: ViewDict 37 | explores: ExploreDict 38 | pretty_name: str 39 | glean_app: bool 40 | connection: str 41 | spoke: str 42 | 43 | 44 | def setup_env_with_looker_creds() -> bool: 45 | """ 46 | Set up env with looker credentials. 47 | 48 | Returns TRUE if the config is complete. 49 | """ 50 | client_id = os.environ.get("LOOKER_API_CLIENT_ID") 51 | client_secret = os.environ.get("LOOKER_API_CLIENT_SECRET") 52 | instance = os.environ.get("LOOKER_INSTANCE_URI") 53 | 54 | if client_id is None or client_secret is None or instance is None: 55 | return False 56 | 57 | os.environ["LOOKERSDK_BASE_URL"] = instance 58 | os.environ["LOOKERSDK_API_VERSION"] = "4.0" 59 | os.environ["LOOKERSDK_VERIFY_SSL"] = "true" 60 | os.environ["LOOKERSDK_TIMEOUT"] = "120" 61 | os.environ["LOOKERSDK_CLIENT_ID"] = client_id 62 | os.environ["LOOKERSDK_CLIENT_SECRET"] = client_secret 63 | 64 | return True 65 | 66 | 67 | def generate_model( 68 | spoke_path: Path, name: str, namespace_defn: NamespaceDict, db_connection: str 69 | ) -> Path: 70 | """ 71 | Generate a model file for a namespace. 72 | 73 | We want these to have a nice label and a unique name. 74 | We only import explores and dashboards, as we want those 75 | to auto-import upon generation. 76 | 77 | Views are not imported by default, since they should 78 | be added one-by-one if they are included in an explore. 79 | """ 80 | logging.info(f"Generating model {name}...") 81 | model_defn = { 82 | "connection": db_connection, 83 | "label": namespace_defn["pretty_name"], 84 | } 85 | 86 | # automatically import generated explores for new glean apps 87 | has_explores = len(namespace_defn.get("explores", {})) > 0 88 | 89 | path = spoke_path / name / f"{name}.model.lkml" 90 | # lkml.dump may return None, in which case write an empty file 91 | footer_text = f""" 92 | # Include files from looker-hub or spoke-default below. For example: 93 | {'' if has_explores else '# '}include: "//looker-hub/{name}/explores/*" 94 | # include: "//looker-hub/{name}/dashboards/*" 95 | # include: "views/*" 96 | # include: "explores/*" 97 | # include: "dashboards/*" 98 | """ 99 | model_text = lkml.dump(model_defn) 100 | if model_text is None: 101 | path.write_text("") 102 | else: 103 | path.write_text(model_text + footer_text) 104 | 105 | return path 106 | 107 | 108 | def configure_model( 109 | sdk: looker_sdk.methods40.Looker40SDK, 110 | model_name: str, 111 | db_connection: str, 112 | spoke_project: str, 113 | ): 114 | """Configure a Looker model by name.""" 115 | instance = os.environ["LOOKER_INSTANCE_URI"] 116 | logging.info(f"Configuring model {model_name}...") 117 | 118 | try: 119 | sdk.lookml_model(model_name) 120 | logging.info("Model is configured!") 121 | return 122 | except looker_sdk.error.SDKError: 123 | pass 124 | 125 | sdk.create_lookml_model( 126 | looker_sdk.models40.WriteLookmlModel( 127 | allowed_db_connection_names=[db_connection], 128 | name=model_name, 129 | project_name=spoke_project, 130 | ) 131 | ) 132 | 133 | for model_set_name in MODEL_SETS_BY_INSTANCE[instance]: 134 | model_sets = sdk.search_model_sets(name=model_set_name) 135 | if len(model_sets) != 1: 136 | raise click.ClickException("Error: Found more than one matching model set") 137 | 138 | model_set = model_sets[0] 139 | models, _id = model_set.models, model_set.id 140 | if models is None or _id is None: 141 | raise click.ClickException("Error: Missing models or name from model_set") 142 | 143 | sdk.update_model_set( 144 | _id, looker_sdk.models40.WriteModelSet(models=list(models) + [model_name]) 145 | ) 146 | 147 | 148 | def generate_directories( 149 | namespaces: Dict[str, NamespaceDict], base_dir: Path, sdk_setup=False 150 | ): 151 | """Generate directories and model for a namespace, if it doesn't exist.""" 152 | seen_spoke_namespaces = defaultdict(list) 153 | for namespace, defn in namespaces.items(): 154 | spoke = defn["spoke"] 155 | seen_spoke_namespaces[spoke].append(namespace) 156 | 157 | spoke_dir = base_dir / spoke 158 | spoke_dir.mkdir(parents=True, exist_ok=True) 159 | print(f"Writing {namespace} to {spoke_dir}") 160 | existing_dirs = {p.name for p in spoke_dir.iterdir()} 161 | 162 | if namespace in existing_dirs: 163 | continue 164 | 165 | (spoke_dir / namespace).mkdir() 166 | for dirname in ("views", "explores", "dashboards"): 167 | (spoke_dir / namespace / dirname).mkdir() 168 | (spoke_dir / namespace / dirname / ".gitkeep").touch() 169 | 170 | db_connection: str = defn.get("connection", DEFAULT_DB_CONNECTION) 171 | generate_model(spoke_dir, namespace, defn, db_connection) 172 | 173 | if sdk_setup: 174 | spoke_project = spoke.lstrip("looker-") 175 | sdk = looker_sdk.init40() 176 | logging.info("Looker SDK 4.0 initialized successfully.") 177 | configure_model(sdk, namespace, db_connection, spoke_project) 178 | 179 | # remove directories for namespaces that got removed 180 | for spoke in seen_spoke_namespaces.keys(): 181 | spoke_dir = base_dir / spoke 182 | existing_dirs = {p.name for p in spoke_dir.iterdir()} 183 | 184 | for existing_dir in existing_dirs: 185 | # make sure the directory belongs to a namespace by checking if a model file exists 186 | if (spoke_dir / existing_dir / f"{existing_dir}.model.lkml").is_file(): 187 | if existing_dir not in seen_spoke_namespaces[spoke]: 188 | # namespace does not exists anymore, remove directory 189 | print(f"Removing {existing_dir} from {spoke_dir}") 190 | shutil.rmtree(spoke_dir / existing_dir) 191 | 192 | 193 | @click.command(help=__doc__) 194 | @click.option( 195 | "--namespaces", 196 | default="namespaces.yaml", 197 | type=click.File(), 198 | help="Path to the namespaces.yaml file.", 199 | ) 200 | @click.option( 201 | "--spoke-dir", 202 | default=".", 203 | type=click.Path(file_okay=False, dir_okay=True, writable=True), 204 | help="Directory containing the Looker spoke.", 205 | ) 206 | def update_spoke(namespaces, spoke_dir): 207 | """Generate updates to spoke project.""" 208 | _namespaces = yaml.safe_load(namespaces) 209 | sdk_setup = setup_env_with_looker_creds() 210 | generate_directories(_namespaces, Path(spoke_dir), sdk_setup) 211 | -------------------------------------------------------------------------------- /generator/utils.py: -------------------------------------------------------------------------------- 1 | """Utils.""" 2 | 3 | import urllib.request 4 | from pathlib import Path 5 | 6 | LOOKER_HUB_URL = "https://raw.githubusercontent.com/mozilla/looker-hub/main" 7 | 8 | 9 | def get_file_from_looker_hub(path: Path): 10 | """Download a specific lookml artifact from looker-hub.""" 11 | file = path.name 12 | artifact_type = path.parent.name 13 | namespace = path.parent.parent.name 14 | print(f"{LOOKER_HUB_URL}/{namespace}/{artifact_type}/{file}") 15 | with urllib.request.urlopen( 16 | f"{LOOKER_HUB_URL}/{namespace}/{artifact_type}/{file}" 17 | ) as response: 18 | lookml = response.read().decode(response.headers.get_content_charset()) 19 | path.parent.mkdir(parents=True, exist_ok=True) 20 | path.write_text(lookml) 21 | -------------------------------------------------------------------------------- /generator/views/__init__.py: -------------------------------------------------------------------------------- 1 | """All available Looker views.""" 2 | 3 | from .client_counts_view import ClientCountsView 4 | from .events_view import EventsView 5 | from .funnel_analysis_view import FunnelAnalysisView 6 | from .glean_ping_view import GleanPingView 7 | from .growth_accounting_view import GrowthAccountingView 8 | from .metric_definitions_view import MetricDefinitionsView 9 | from .operational_monitoring_alerting_view import OperationalMonitoringAlertingView 10 | from .operational_monitoring_view import OperationalMonitoringView 11 | from .ping_view import PingView 12 | from .table_view import TableView 13 | from .view import View, ViewDict # noqa: F401 14 | 15 | VIEW_TYPES = { 16 | ClientCountsView.type: ClientCountsView, 17 | EventsView.type: EventsView, 18 | FunnelAnalysisView.type: FunnelAnalysisView, 19 | OperationalMonitoringView.type: OperationalMonitoringView, 20 | OperationalMonitoringAlertingView.type: OperationalMonitoringAlertingView, 21 | MetricDefinitionsView.type: MetricDefinitionsView, 22 | GleanPingView.type: GleanPingView, 23 | PingView.type: PingView, 24 | GrowthAccountingView.type: GrowthAccountingView, 25 | TableView.type: TableView, 26 | } 27 | -------------------------------------------------------------------------------- /generator/views/client_counts_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe a Client Counts View.""" 2 | 3 | from __future__ import annotations 4 | 5 | from copy import deepcopy 6 | from typing import Any, Dict, Iterator, List, Optional, Union 7 | 8 | from .view import View, ViewDict 9 | 10 | 11 | class ClientCountsView(View): 12 | """A view for Client Counting measures.""" 13 | 14 | type: str = "client_counts_view" 15 | 16 | default_dimension_groups: List[Dict[str, Union[str, List[str]]]] = [ 17 | { 18 | "name": "since_first_seen", 19 | "type": "duration", 20 | "description": "Amount of time that has passed since the client was first seen.", 21 | "sql_start": "CAST(${TABLE}.first_seen_date AS TIMESTAMP)", 22 | "sql_end": "CAST(${TABLE}.submission_date AS TIMESTAMP)", 23 | "intervals": ["day", "week", "month", "year"], 24 | } 25 | ] 26 | 27 | default_dimensions: List[Dict[str, str]] = [ 28 | { 29 | "name": "have_completed_period", 30 | "type": "yesno", 31 | "description": "Only for use with cohort analysis. " 32 | "Filter on true to remove the tail of incomplete data from cohorts. " 33 | "Indicates whether the cohort for this row have all had a chance to complete this interval. " 34 | "For example, new clients from yesterday have not all had a chance to send a ping for today.", 35 | "sql": """ 36 | DATE_ADD( 37 | {% if client_counts.first_seen_date._is_selected %} 38 | DATE_ADD(DATE(${client_counts.first_seen_date}), INTERVAL 1 DAY) 39 | {% elsif client_counts.first_seen_week._is_selected %} 40 | DATE_ADD(DATE(${client_counts.first_seen_week}), INTERVAL 1 WEEK) 41 | {% elsif client_counts.first_seen_month._is_selected %} 42 | DATE_ADD(PARSE_DATE('%Y-%m', ${client_counts.first_seen_month}), INTERVAL 1 MONTH) 43 | {% elsif client_counts.first_seen_year._is_selected %} 44 | DATE_ADD(DATE(${client_counts.first_seen_year}, 1, 1), INTERVAL 1 YEAR) 45 | {% endif %} 46 | , 47 | {% if client_counts.days_since_first_seen._is_selected %} 48 | INTERVAL CAST(${client_counts.days_since_first_seen} AS INT64) DAY 49 | {% elsif client_counts.weeks_since_first_seen._is_selected %} 50 | INTERVAL CAST(${client_counts.weeks_since_first_seen} AS INT64) WEEK 51 | {% elsif client_counts.months_since_first_seen._is_selected %} 52 | INTERVAL CAST(${client_counts.months_since_first_seen} AS INT64) MONTH 53 | {% elsif client_counts.years_since_first_seen._is_selected %} 54 | INTERVAL CAST(${client_counts.months_since_first_seen} AS INT64) YEAR 55 | {% endif %} 56 | ) < current_date 57 | """, 58 | } 59 | ] 60 | 61 | default_measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [ 62 | { 63 | "name": "client_count", 64 | "type": "number", 65 | "description": "The number of clients, " 66 | "determined by whether they sent a baseline ping on the day in question.", 67 | "sql": "COUNT(DISTINCT ${TABLE}.client_id)", 68 | } 69 | ] 70 | 71 | def __init__( 72 | self, 73 | namespace: str, 74 | tables: List[Dict[str, str]], 75 | name: str = "client_counts", 76 | ): 77 | """Get an instance of a ClientCountsView.""" 78 | super().__init__(namespace, name, ClientCountsView.type, tables) 79 | 80 | @classmethod 81 | def from_db_views( 82 | klass, 83 | namespace: str, 84 | is_glean: bool, 85 | channels: List[Dict[str, str]], 86 | db_views: dict, 87 | ) -> Iterator[ClientCountsView]: 88 | """Get Client Count Views from db views and app variants.""" 89 | # We can guarantee there will always be at least one channel, 90 | # because this comes from the associated _get_glean_repos in 91 | # namespaces.py 92 | dataset = next( 93 | (channel for channel in channels if channel.get("channel") == "release"), 94 | channels[0], 95 | )["dataset"] 96 | 97 | for view_id, references in db_views[dataset].items(): 98 | if view_id == "baseline_clients_daily" or view_id == "clients_daily": 99 | yield ClientCountsView( 100 | namespace, [{"table": f"mozdata.{dataset}.{view_id}"}] 101 | ) 102 | 103 | @classmethod 104 | def from_dict( 105 | klass, namespace: str, name: str, _dict: ViewDict 106 | ) -> ClientCountsView: 107 | """Get a view from a name and dict definition.""" 108 | return ClientCountsView(namespace, _dict["tables"], name) 109 | 110 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 111 | """Generate LookML for this view.""" 112 | table = self.tables[0]["table"] 113 | 114 | base_view = "baseline_clients_daily_table" 115 | if table is not None: 116 | base_view = table.split(".")[-1] + "_table" 117 | 118 | view_defn: Dict[str, Any] = { 119 | "extends": [base_view], 120 | "name": self.name, 121 | } 122 | 123 | # add dimensions and dimension groups 124 | view_defn["dimensions"] = deepcopy(ClientCountsView.default_dimensions) 125 | view_defn["dimension_groups"] = deepcopy( 126 | ClientCountsView.default_dimension_groups 127 | ) 128 | 129 | # add measures 130 | view_defn["measures"] = self.get_measures() 131 | 132 | return { 133 | "includes": [base_view + ".view.lkml"], 134 | "views": [view_defn], 135 | } 136 | 137 | def get_measures(self) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 138 | """Generate measures for the Growth Accounting Framework.""" 139 | return deepcopy(ClientCountsView.default_measures) 140 | -------------------------------------------------------------------------------- /generator/views/datagroups.py: -------------------------------------------------------------------------------- 1 | """Generate datagroup lkml files for each namespace.""" 2 | 3 | import logging 4 | from dataclasses import dataclass 5 | from pathlib import Path 6 | from typing import Any, List, Optional 7 | 8 | import lkml 9 | 10 | from generator.dryrun import DryRunError, Errors 11 | from generator.namespaces import DEFAULT_GENERATED_SQL_URI 12 | from generator.utils import get_file_from_looker_hub 13 | from generator.views import View, lookml_utils 14 | from generator.views.lookml_utils import BQViewReferenceMap 15 | 16 | DEFAULT_MAX_CACHE_AGE = "24 hours" 17 | 18 | SQL_TRIGGER_TEMPLATE_SINGLE_TABLE = """ 19 | SELECT MAX(storage_last_modified_time) AS storage_last_modified_time 20 | FROM `{project_id}`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE 21 | WHERE {table} 22 | """ 23 | 24 | SQL_TRIGGER_TEMPLATE_ALL_TABLES = """ 25 | SELECT MAX(storage_last_modified_time) 26 | FROM ( 27 | {tables} 28 | ) 29 | """ 30 | 31 | # To map views to their underlying tables: 32 | DATASET_VIEW_MAP = lookml_utils.get_bigquery_view_reference_map( 33 | DEFAULT_GENERATED_SQL_URI 34 | ) 35 | 36 | FILE_HEADER = """# *Do not manually modify this file* 37 | 38 | # This file has been generated via https://github.com/mozilla/lookml-generator 39 | 40 | # Using a datagroup in an Explore: https://cloud.google.com/looker/docs/reference/param-explore-persist-with 41 | # Using a datagroup in a derived table: https://cloud.google.com/looker/docs/reference/param-view-datagroup-trigger 42 | 43 | """ 44 | 45 | 46 | @dataclass(frozen=True, eq=True) 47 | class Datagroup: 48 | """Represents a Datagroup.""" 49 | 50 | name: str 51 | label: str 52 | sql_trigger: str 53 | description: str 54 | max_cache_age: str = DEFAULT_MAX_CACHE_AGE 55 | 56 | def __str__(self) -> str: 57 | """Return the LookML string representation of a Datagroup.""" 58 | return lkml.dump({"datagroups": [self.__dict__]}) # type: ignore 59 | 60 | def __lt__(self, other) -> bool: 61 | """Make datagroups sortable.""" 62 | return self.name < other.name 63 | 64 | 65 | def _get_datagroup_from_bigquery_tables( 66 | project_id, tables, view: View 67 | ) -> Optional[Datagroup]: 68 | """Use template and default values to create a Datagroup from a BQ Table.""" 69 | if len(tables) == 0: 70 | return None 71 | 72 | datagroup_tables = [] 73 | for table in tables: 74 | dataset_id = table[1] 75 | table_id = table[2] 76 | 77 | datagroup_tables.append( 78 | SQL_TRIGGER_TEMPLATE_SINGLE_TABLE.format( 79 | project_id=table[0], 80 | table=f"(table_schema = '{dataset_id}' AND table_name = '{table_id}')", 81 | ) 82 | ) 83 | 84 | # create a datagroup associated to a view which will be used for caching 85 | return Datagroup( 86 | name=f"{view.name}_last_updated", 87 | label=f"{view.name} Last Updated", 88 | description=f"Updates for {view.name} when referenced tables are modified.", 89 | sql_trigger=SQL_TRIGGER_TEMPLATE_ALL_TABLES.format( 90 | project_id=project_id, tables=" UNION ALL ".join(datagroup_tables) 91 | ), 92 | ) 93 | 94 | 95 | def _get_datagroup_from_bigquery_view( 96 | project_id, 97 | dataset_id, 98 | table_id, 99 | dataset_view_map: BQViewReferenceMap, 100 | view: View, 101 | ) -> Optional[Datagroup]: 102 | # Dataset view map only contains references for shared-prod views. 103 | full_table_id = f"{project_id}.{dataset_id}.{table_id}" 104 | 105 | view_references = _get_referenced_tables( 106 | project_id, dataset_id, table_id, dataset_view_map, [] 107 | ) 108 | 109 | if not view_references or len(view_references) == 0: 110 | # Some views might not reference a source table 111 | logging.debug(f"Unable to find a source for {full_table_id} in generated-sql.") 112 | return None 113 | 114 | return _get_datagroup_from_bigquery_tables(project_id, view_references, view) 115 | 116 | 117 | def _get_referenced_tables( 118 | project_id, 119 | dataset_id, 120 | table_id, 121 | dataset_view_map: BQViewReferenceMap, 122 | seen: List[List[str]], 123 | ) -> List[List[str]]: 124 | """ 125 | Return a list of all tables referenced by the provided view. 126 | 127 | Recursively, resolve references of referenced views to only get table dependencies. 128 | """ 129 | if [project_id, dataset_id, table_id] in seen: 130 | return [[project_id, dataset_id, table_id]] 131 | 132 | seen += [[project_id, dataset_id, table_id]] 133 | 134 | dataset_view_references = dataset_view_map.get(dataset_id) 135 | 136 | if dataset_view_references is None: 137 | return [[project_id, dataset_id, table_id]] 138 | 139 | view_references = dataset_view_references.get(table_id) 140 | if view_references is None: 141 | return [[project_id, dataset_id, table_id]] 142 | 143 | return [ 144 | ref 145 | for view_reference in view_references 146 | for ref in _get_referenced_tables( 147 | view_reference[0], 148 | view_reference[1], 149 | view_reference[2], 150 | dataset_view_map, 151 | seen.copy(), 152 | ) 153 | if view_reference not in seen 154 | ] 155 | 156 | 157 | def _generate_view_datagroup( 158 | view: View, 159 | dataset_view_map: BQViewReferenceMap, 160 | dryrun, 161 | ) -> Optional[Datagroup]: 162 | """Generate the Datagroup LookML for a Looker View.""" 163 | if len(view.tables) == 0: 164 | return None 165 | 166 | # Use the release channel table or the first available table (usually the only one): 167 | view_tables = next( 168 | (table for table in view.tables if table.get("channel") == "release"), 169 | view.tables[0], 170 | ) 171 | 172 | if "table" not in view_tables: 173 | return None 174 | 175 | view_table = view_tables["table"] 176 | 177 | [project, dataset, table] = view_table.split(".") 178 | table_metadata = dryrun.create( 179 | project=project, 180 | dataset=dataset, 181 | table=table, 182 | ).get_table_metadata() 183 | 184 | if "TABLE" == table_metadata.get("tableType"): 185 | datagroups = _get_datagroup_from_bigquery_tables( 186 | project, [[project, dataset, table]], view 187 | ) 188 | return datagroups 189 | elif "VIEW" == table_metadata.get("tableType"): 190 | datagroups = _get_datagroup_from_bigquery_view( 191 | project, dataset, table, dataset_view_map, view 192 | ) 193 | return datagroups 194 | 195 | return None 196 | 197 | 198 | def generate_datagroup( 199 | view: View, 200 | target_dir: Path, 201 | namespace: str, 202 | dryrun, 203 | ) -> Any: 204 | """Generate and write a datagroups.lkml file to the namespace folder.""" 205 | datagroups_folder_path = target_dir / namespace / "datagroups" 206 | 207 | datagroup = None 208 | try: 209 | datagroup = _generate_view_datagroup(view, DATASET_VIEW_MAP, dryrun) 210 | except DryRunError as e: 211 | if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function: 212 | path = datagroups_folder_path / f"{e.table_id}_last_updated.datagroup.lkml" 213 | print( 214 | f"Permission error dry running: {path}. Copy existing file from looker-hub." 215 | ) 216 | try: 217 | get_file_from_looker_hub(path) 218 | except Exception as ex: 219 | print(f"Skip generating datagroup for {path}: {ex}") 220 | else: 221 | raise 222 | 223 | datagroup_paths = [] 224 | if datagroup: 225 | datagroups_folder_path.mkdir(exist_ok=True) 226 | datagroup_lkml_path = ( 227 | datagroups_folder_path / f"{datagroup.name}.datagroup.lkml" 228 | ) 229 | datagroup_lkml_path.write_text(FILE_HEADER + str(datagroup)) 230 | datagroup_paths.append(datagroup_lkml_path) 231 | 232 | return datagroup_paths 233 | -------------------------------------------------------------------------------- /generator/views/events_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe an Events view.""" 2 | 3 | from __future__ import annotations 4 | 5 | from copy import deepcopy 6 | from typing import Any, Dict, Iterator, List, Optional 7 | 8 | from . import lookml_utils 9 | from .view import View, ViewDict 10 | 11 | 12 | class EventsView(View): 13 | """A view for querying events data, with one row per-event.""" 14 | 15 | type: str = "events_view" 16 | 17 | default_measures: List[Dict[str, str]] = [ 18 | { 19 | "name": "event_count", 20 | "type": "count", 21 | "description": ("The number of times the event(s) occurred."), 22 | }, 23 | ] 24 | 25 | def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 26 | """Get an instance of an EventsView.""" 27 | super().__init__(namespace, name, EventsView.type, tables) 28 | 29 | @classmethod 30 | def from_db_views( 31 | klass, 32 | namespace: str, 33 | is_glean: bool, 34 | channels: List[Dict[str, str]], 35 | db_views: dict, 36 | ) -> Iterator[EventsView]: 37 | """Get Events Views from db views and app variants.""" 38 | # We can guarantee there will always be at least one channel, 39 | # because this comes from the associated _get_glean_repos in 40 | # namespaces.py 41 | dataset = next( 42 | (channel for channel in channels if channel.get("channel") == "release"), 43 | channels[0], 44 | )["dataset"] 45 | 46 | for view_id, references in db_views[dataset].items(): 47 | if view_id == "events_unnested": 48 | yield EventsView( 49 | namespace, 50 | "events", 51 | [ 52 | { 53 | "events_table_view": "events_unnested_table", 54 | "base_table": f"mozdata.{dataset}.{view_id}", 55 | } 56 | ], 57 | ) 58 | 59 | @classmethod 60 | def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> EventsView: 61 | """Get a view from a name and dict definition.""" 62 | return EventsView(namespace, name, _dict["tables"]) 63 | 64 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 65 | """Generate LookML for this view.""" 66 | view_defn: Dict[str, Any] = { 67 | "extends": [self.tables[0]["events_table_view"]], 68 | "name": self.name, 69 | } 70 | 71 | # add measures 72 | dimensions = lookml_utils._generate_dimensions( 73 | self.tables[0]["base_table"], dryrun=dryrun 74 | ) 75 | view_defn["measures"] = self.get_measures(dimensions) 76 | 77 | # set document_id as primary key if it exists in the underlying table 78 | # this will allow one_to_many joins 79 | event_id_dimension = self.generate_event_id_dimension(dimensions) 80 | if event_id_dimension is not None: 81 | view_defn["dimensions"] = [event_id_dimension] 82 | 83 | return { 84 | "includes": [f"{self.tables[0]['events_table_view']}.view.lkml"], 85 | "views": [view_defn], 86 | } 87 | 88 | def get_measures(self, dimensions) -> List[Dict[str, str]]: 89 | """Generate measures for Events Views.""" 90 | measures = deepcopy(EventsView.default_measures) 91 | client_id_field = self.get_client_id(dimensions, "events") 92 | if client_id_field is not None: 93 | measures.append( 94 | { 95 | "name": "client_count", 96 | "type": "count_distinct", 97 | "sql": f"${{{client_id_field}}}", 98 | "description": ( 99 | "The number of clients that completed the event(s)." 100 | ), 101 | } 102 | ) 103 | 104 | return measures 105 | 106 | def generate_event_id_dimension( 107 | self, dimensions: list[dict] 108 | ) -> Optional[Dict[str, str]]: 109 | """Generate the event_id dimension to be used as a primary key for a one to many join.""" 110 | event_id = self.select_dimension("event_id", dimensions, "events") 111 | if event_id: 112 | return { 113 | "name": "event_id", 114 | "primary_key": "yes", 115 | } 116 | return None 117 | -------------------------------------------------------------------------------- /generator/views/growth_accounting_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe a Growth Accounting View.""" 2 | 3 | from __future__ import annotations 4 | 5 | from copy import deepcopy 6 | from itertools import filterfalse 7 | from typing import Any, Dict, Iterator, List, Optional, Union 8 | 9 | from . import lookml_utils 10 | from .view import View, ViewDict 11 | 12 | 13 | class GrowthAccountingView(View): 14 | """A view for growth accounting measures.""" 15 | 16 | type: str = "growth_accounting_view" 17 | DEFAULT_IDENTIFIER_FIELD: str = "client_id" 18 | 19 | other_dimensions: List[Dict[str, str]] = [ 20 | { 21 | "name": "first", 22 | "sql": "{TABLE}.first", 23 | "type": "yesno", 24 | "hidden": "yes", 25 | } 26 | ] 27 | 28 | default_measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [ 29 | { 30 | "name": "overall_active_previous", 31 | "type": "count", 32 | "filters": [{"active_last_week": "yes"}], 33 | }, 34 | { 35 | "name": "overall_active_current", 36 | "type": "count", 37 | "filters": [{"active_this_week": "yes"}], 38 | }, 39 | { 40 | "name": "overall_resurrected", 41 | "type": "count", 42 | "filters": [ 43 | {"new_last_week": "no"}, 44 | {"new_this_week": "no"}, 45 | {"active_last_week": "no"}, 46 | {"active_this_week": "yes"}, 47 | ], 48 | }, 49 | { 50 | "name": "new_users", 51 | "type": "count", 52 | "filters": [{"new_this_week": "yes"}, {"active_this_week": "yes"}], 53 | }, 54 | { 55 | "name": "established_users_returning", 56 | "type": "count", 57 | "filters": [ 58 | {"new_last_week": "no"}, 59 | {"new_this_week": "no"}, 60 | {"active_last_week": "yes"}, 61 | {"active_this_week": "yes"}, 62 | ], 63 | }, 64 | { 65 | "name": "new_users_returning", 66 | "type": "count", 67 | "filters": [ 68 | {"new_last_week": "yes"}, 69 | {"active_last_week": "yes"}, 70 | {"active_this_week": "yes"}, 71 | ], 72 | }, 73 | { 74 | "name": "new_users_churned_count", 75 | "type": "count", 76 | "filters": [ 77 | {"new_last_week": "yes"}, 78 | {"active_last_week": "yes"}, 79 | {"active_this_week": "no"}, 80 | ], 81 | }, 82 | { 83 | "name": "established_users_churned_count", 84 | "type": "count", 85 | "filters": [ 86 | {"new_last_week": "no"}, 87 | {"new_this_week": "no"}, 88 | {"active_last_week": "yes"}, 89 | {"active_this_week": "no"}, 90 | ], 91 | }, 92 | { 93 | "name": "new_users_churned", 94 | "type": "number", 95 | "sql": "-1 * ${new_users_churned_count}", 96 | }, 97 | { 98 | "name": "established_users_churned", 99 | "type": "number", 100 | "sql": "-1 * ${established_users_churned_count}", 101 | }, 102 | { 103 | "name": "overall_churned", 104 | "type": "number", 105 | "sql": "${new_users_churned} + ${established_users_churned}", 106 | }, 107 | { 108 | "name": "overall_retention_rate", 109 | "type": "number", 110 | "sql": ( 111 | "SAFE_DIVIDE(" 112 | "(${established_users_returning} + ${new_users_returning})," 113 | "${overall_active_previous}" 114 | ")" 115 | ), 116 | }, 117 | { 118 | "name": "established_user_retention_rate", 119 | "type": "number", 120 | "sql": ( 121 | "SAFE_DIVIDE(" 122 | "${established_users_returning}," 123 | "(${established_users_returning} + ${established_users_churned_count})" 124 | ")" 125 | ), 126 | }, 127 | { 128 | "name": "new_user_retention_rate", 129 | "type": "number", 130 | "sql": ( 131 | "SAFE_DIVIDE(" 132 | "${new_users_returning}," 133 | "(${new_users_returning} + ${new_users_churned_count})" 134 | ")" 135 | ), 136 | }, 137 | { 138 | "name": "overall_churn_rate", 139 | "type": "number", 140 | "sql": ( 141 | "SAFE_DIVIDE(" 142 | "(${established_users_churned_count} + ${new_users_churned_count})," 143 | "${overall_active_previous}" 144 | ")" 145 | ), 146 | }, 147 | { 148 | "name": "fraction_of_active_resurrected", 149 | "type": "number", 150 | "sql": "SAFE_DIVIDE(${overall_resurrected}, ${overall_active_current})", 151 | }, 152 | { 153 | "name": "fraction_of_active_new", 154 | "type": "number", 155 | "sql": "SAFE_DIVIDE(${new_users}, ${overall_active_current})", 156 | }, 157 | { 158 | "name": "fraction_of_active_established_returning", 159 | "type": "number", 160 | "sql": ( 161 | "SAFE_DIVIDE(" 162 | "${established_users_returning}," 163 | "${overall_active_current}" 164 | ")" 165 | ), 166 | }, 167 | { 168 | "name": "fraction_of_active_new_returning", 169 | "type": "number", 170 | "sql": "SAFE_DIVIDE(${new_users_returning}, ${overall_active_current})", 171 | }, 172 | { 173 | "name": "quick_ratio", 174 | "type": "number", 175 | "sql": ( 176 | "SAFE_DIVIDE(" 177 | "${new_users} + ${overall_resurrected}," 178 | "${established_users_churned_count} + ${new_users_churned_count}" 179 | ")" 180 | ), 181 | }, 182 | ] 183 | 184 | def __init__( 185 | self, 186 | namespace: str, 187 | tables: List[Dict[str, str]], 188 | identifier_field: str = DEFAULT_IDENTIFIER_FIELD, 189 | ): 190 | """Get an instance of a GrowthAccountingView.""" 191 | self.identifier_field = identifier_field 192 | 193 | super().__init__( 194 | namespace, "growth_accounting", GrowthAccountingView.type, tables 195 | ) 196 | 197 | @classmethod 198 | def get_default_dimensions( 199 | klass, identifier_field: str = DEFAULT_IDENTIFIER_FIELD 200 | ) -> List[Dict[str, str]]: 201 | """Get dimensions to be added to GrowthAccountingView by default.""" 202 | return [ 203 | { 204 | "name": "active_this_week", 205 | "sql": "mozfun.bits28.active_in_range(days_seen_bits, -6, 7)", 206 | "type": "yesno", 207 | "hidden": "yes", 208 | }, 209 | { 210 | "name": "active_last_week", 211 | "sql": "mozfun.bits28.active_in_range(days_seen_bits, -13, 7)", 212 | "type": "yesno", 213 | "hidden": "yes", 214 | }, 215 | { 216 | "name": "new_this_week", 217 | "sql": "DATE_DIFF(${submission_date}, first_run_date, DAY) BETWEEN 0 AND 6", 218 | "type": "yesno", 219 | "hidden": "yes", 220 | }, 221 | { 222 | "name": "new_last_week", 223 | "sql": "DATE_DIFF(${submission_date}, first_run_date, DAY) BETWEEN 7 AND 13", 224 | "type": "yesno", 225 | "hidden": "yes", 226 | }, 227 | { 228 | "name": f"{identifier_field}_day", 229 | "sql": f"CONCAT(CAST(${{TABLE}}.submission_date AS STRING), ${{{identifier_field}}})", 230 | "type": "string", 231 | "hidden": "yes", 232 | "primary_key": "yes", 233 | }, 234 | ] 235 | 236 | @classmethod 237 | def from_db_views( 238 | klass, 239 | namespace: str, 240 | is_glean: bool, 241 | channels: List[Dict[str, str]], 242 | db_views: dict, 243 | identifier_field: str = DEFAULT_IDENTIFIER_FIELD, 244 | ) -> Iterator[GrowthAccountingView]: 245 | """Get Growth Accounting Views from db views and app variants.""" 246 | dataset = next( 247 | (channel for channel in channels if channel.get("channel") == "release"), 248 | channels[0], 249 | )["dataset"] 250 | 251 | for view_id, references in db_views[dataset].items(): 252 | if view_id == "baseline_clients_last_seen": 253 | yield GrowthAccountingView( 254 | namespace, 255 | [{"table": f"mozdata.{dataset}.{view_id}"}], 256 | identifier_field=identifier_field, 257 | ) 258 | 259 | @classmethod 260 | def from_dict( 261 | klass, namespace: str, name: str, _dict: ViewDict 262 | ) -> GrowthAccountingView: 263 | """Get a view from a name and dict definition.""" 264 | return GrowthAccountingView( 265 | namespace, 266 | _dict["tables"], 267 | identifier_field=str( 268 | _dict.get( 269 | "identifier_field", GrowthAccountingView.DEFAULT_IDENTIFIER_FIELD 270 | ) 271 | ), 272 | ) 273 | 274 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 275 | """Generate LookML for this view.""" 276 | view_defn: Dict[str, Any] = {"name": self.name} 277 | table = self.tables[0]["table"] 278 | 279 | # add dimensions and dimension groups 280 | dimensions = lookml_utils._generate_dimensions(table, dryrun=dryrun) + deepcopy( 281 | GrowthAccountingView.get_default_dimensions( 282 | identifier_field=self.identifier_field 283 | ) 284 | ) 285 | 286 | view_defn["dimensions"] = list( 287 | filterfalse(lookml_utils._is_dimension_group, dimensions) 288 | ) 289 | view_defn["dimension_groups"] = list( 290 | filter(lookml_utils._is_dimension_group, dimensions) 291 | ) 292 | 293 | # add measures 294 | view_defn["measures"] = self.get_measures() 295 | 296 | # SQL Table Name 297 | view_defn["sql_table_name"] = f"`{table}`" 298 | 299 | return {"views": [view_defn]} 300 | 301 | def get_measures(self) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 302 | """Generate measures for the Growth Accounting Framework.""" 303 | return deepcopy(GrowthAccountingView.default_measures) 304 | -------------------------------------------------------------------------------- /generator/views/operational_monitoring_alerting_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe an Operational Monitoring Alert View.""" 2 | 3 | from typing import Any, Dict, Optional 4 | 5 | from . import lookml_utils 6 | from .operational_monitoring_view import OperationalMonitoringView 7 | 8 | 9 | class OperationalMonitoringAlertingView(OperationalMonitoringView): 10 | """A view on a alert operational monitoring table.""" 11 | 12 | type: str = "operational_monitoring_alerting_view" 13 | 14 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 15 | """Get this view as LookML.""" 16 | if len(self.tables) == 0: 17 | raise Exception((f"Operational Monitoring view {self.name} has no tables")) 18 | 19 | reference_table = self.tables[0]["table"] 20 | dimensions = [ 21 | d 22 | for d in lookml_utils._generate_dimensions(reference_table, dryrun=dryrun) 23 | if d["name"] != "submission" 24 | ] 25 | 26 | dimensions.append( 27 | { 28 | "name": "submission_date", 29 | "type": "date", 30 | "sql": "${TABLE}.submission_date", 31 | "datatype": "date", 32 | "convert_tz": "no", 33 | } 34 | ) 35 | 36 | dimensions.append( 37 | { 38 | "name": "build_id_date", 39 | "type": "date", 40 | "hidden": "yes", 41 | "sql": "PARSE_DATE('%Y%m%d', CAST(${TABLE}.build_id AS STRING))", 42 | "datatype": "date", 43 | "convert_tz": "no", 44 | } 45 | ) 46 | 47 | return { 48 | "views": [ 49 | { 50 | "name": self.name, 51 | "sql_table_name": f"`{reference_table}`", 52 | "dimensions": dimensions, 53 | "measures": [ 54 | {"name": "errors", "type": "number", "sql": "COUNT(*)"} 55 | ], 56 | } 57 | ] 58 | } 59 | -------------------------------------------------------------------------------- /generator/views/operational_monitoring_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe an Operational Monitoring View.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any, Dict, List, Optional, Union 6 | 7 | from . import lookml_utils 8 | from .ping_view import PingView 9 | from .view import ViewDict 10 | 11 | ALLOWED_DIMENSIONS = { 12 | "branch", 13 | "metric", 14 | "statistic", 15 | "parameter", 16 | } 17 | 18 | 19 | class OperationalMonitoringView(PingView): 20 | """A view on a operational monitoring table.""" 21 | 22 | type: str = "operational_monitoring_view" 23 | 24 | def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 25 | """Create instance of a OperationalMonitoringView.""" 26 | super().__init__(namespace, name, tables) 27 | xaxis = "build_id" 28 | if "xaxis" in tables[0] and len(tables) > 0: 29 | xaxis = tables[0]["xaxis"] 30 | 31 | xaxis_to_sql_mapping = { 32 | "build_id": f"PARSE_DATE('%Y%m%d', CAST(${{TABLE}}.{xaxis} AS STRING))", 33 | "submission_date": f"${{TABLE}}.{xaxis}", 34 | } 35 | self.dimensions: List[Dict[str, str]] = [ 36 | { 37 | "name": xaxis, 38 | "type": "date", 39 | "sql": xaxis_to_sql_mapping[xaxis], 40 | "datatype": "date", 41 | "convert_tz": "no", 42 | } 43 | ] 44 | 45 | @classmethod 46 | def from_dict( 47 | klass, namespace: str, name: str, _dict: ViewDict 48 | ) -> OperationalMonitoringView: 49 | """Get a OperationalMonitoringView from a dict representation.""" 50 | return klass(namespace, name, _dict["tables"]) 51 | 52 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 53 | """Get this view as LookML.""" 54 | if len(self.tables) == 0: 55 | raise Exception((f"Operational Monitoring view {self.name} has no tables")) 56 | 57 | reference_table = self.tables[0]["table"] 58 | all_dimensions = lookml_utils._generate_dimensions( 59 | reference_table, dryrun=dryrun 60 | ) 61 | 62 | filtered_dimensions = [ 63 | d 64 | for d in all_dimensions 65 | if d["name"] in ALLOWED_DIMENSIONS 66 | or d["name"] in self.tables[0].get("dimensions", {}).keys() 67 | ] 68 | self.dimensions.extend(filtered_dimensions) 69 | 70 | return { 71 | "views": [ 72 | { 73 | "name": self.name, 74 | "sql_table_name": reference_table, 75 | "dimensions": self.dimensions, 76 | "measures": self.get_measures( 77 | self.dimensions, reference_table, v1_name 78 | ), 79 | } 80 | ] 81 | } 82 | 83 | def get_measures( 84 | self, dimensions: List[dict], table: str, v1_name: Optional[str] 85 | ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 86 | """Get OpMon measures.""" 87 | return [ 88 | {"name": "point", "type": "sum", "sql": "${TABLE}.point"}, 89 | {"name": "upper", "type": "sum", "sql": "${TABLE}.upper"}, 90 | {"name": "lower", "type": "sum", "sql": "${TABLE}.lower"}, 91 | ] 92 | -------------------------------------------------------------------------------- /generator/views/ping_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe a Ping View.""" 2 | 3 | from __future__ import annotations 4 | 5 | from collections import defaultdict 6 | from typing import Any, Dict, Iterator, List, Optional, Union 7 | 8 | from . import lookml_utils 9 | from .view import OMIT_VIEWS, View, ViewDict 10 | 11 | 12 | class PingView(View): 13 | """A view on a ping table.""" 14 | 15 | type: str = "ping_view" 16 | allow_glean: bool = False 17 | 18 | def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 19 | """Create instance of a PingView.""" 20 | super().__init__(namespace, name, self.__class__.type, tables) 21 | 22 | @classmethod 23 | def from_db_views( 24 | klass, 25 | namespace: str, 26 | is_glean: bool, 27 | channels: List[Dict[str, str]], 28 | db_views: dict, 29 | ) -> Iterator[PingView]: 30 | """Get Looker views for a namespace.""" 31 | if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 32 | return 33 | 34 | view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 35 | for channel in channels: 36 | dataset = channel["dataset"] 37 | 38 | for view_id, references in db_views[dataset].items(): 39 | if view_id in OMIT_VIEWS: 40 | continue 41 | 42 | table_id = f"mozdata.{dataset}.{view_id}" 43 | table: Dict[str, str] = {"table": table_id} 44 | if channel.get("channel") is not None: 45 | table["channel"] = channel["channel"] 46 | 47 | # Only include those that select from a single ping source table 48 | # or union together multiple ping source tables of the same name. 49 | reference_table_names = set(r[-1] for r in references) 50 | reference_dataset_names = set(r[-2] for r in references) 51 | if ( 52 | len(reference_table_names) != 1 53 | or channel["source_dataset"] not in reference_dataset_names 54 | ): 55 | continue 56 | 57 | view_tables[view_id][table_id] = table 58 | 59 | for view_id, tables_by_id in view_tables.items(): 60 | yield klass(namespace, view_id, list(tables_by_id.values())) 61 | 62 | @classmethod 63 | def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 64 | """Get a view from a name and dict definition.""" 65 | return klass(namespace, name, _dict["tables"]) 66 | 67 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 68 | """Generate LookML for this view.""" 69 | view_defn: Dict[str, Any] = {"name": self.name} 70 | 71 | # use schema for the table where channel=="release" or the first one 72 | table = next( 73 | (table for table in self.tables if table.get("channel") == "release"), 74 | self.tables[0], 75 | )["table"] 76 | 77 | dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 78 | 79 | # set document id field as a primary key for joins 80 | view_defn["dimensions"] = [ 81 | d if d["name"] != "document_id" else dict(**d, primary_key="yes") 82 | for d in dimensions 83 | if not lookml_utils._is_dimension_group(d) 84 | ] 85 | view_defn["dimension_groups"] = [ 86 | d for d in dimensions if lookml_utils._is_dimension_group(d) 87 | ] 88 | 89 | # add measures 90 | view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 91 | 92 | [project, dataset, table_id] = table.split(".") 93 | table_schema = dryrun.create( 94 | project=project, 95 | dataset=dataset, 96 | table=table_id, 97 | ).get_table_schema() 98 | nested_views = lookml_utils._generate_nested_dimension_views( 99 | table_schema, self.name 100 | ) 101 | 102 | # Round-tripping through a dict to get an ordered deduped list. 103 | suggestions = list( 104 | dict.fromkeys( 105 | _table["channel"] for _table in self.tables if "channel" in _table 106 | ) 107 | ) 108 | 109 | if len(suggestions) > 1: 110 | view_defn["filters"] = [ 111 | { 112 | "name": "channel", 113 | "type": "string", 114 | "description": "Filter by the app's channel", 115 | "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 116 | "default_value": suggestions[0], 117 | "suggestions": suggestions, 118 | } 119 | ] 120 | 121 | view_defn["sql_table_name"] = f"`{table}`" 122 | 123 | return {"views": [view_defn] + nested_views} 124 | 125 | def get_dimensions( 126 | self, table, v1_name: Optional[str], dryrun 127 | ) -> List[Dict[str, Any]]: 128 | """Get the set of dimensions for this view.""" 129 | # add dimensions and dimension groups 130 | return lookml_utils._generate_dimensions(table, dryrun=dryrun) 131 | 132 | def get_measures( 133 | self, dimensions: List[dict], table: str, v1_name: Optional[str] 134 | ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 135 | """Generate measures from a list of dimensions. 136 | 137 | When no dimension-specific measures are found, return a single "count" measure. 138 | 139 | Raise ClickException if dimensions result in duplicate measures. 140 | """ 141 | # Iterate through each of the dimensions and accumulate any measures 142 | # that we want to include in the view. We pull out the client id first 143 | # since we'll use it to calculate per-measure client counts. 144 | measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 145 | 146 | client_id_field = self.get_client_id(dimensions, table) 147 | if client_id_field is not None: 148 | measures.append( 149 | { 150 | "name": "clients", 151 | "type": "count_distinct", 152 | "sql": f"${{{client_id_field}}}", 153 | } 154 | ) 155 | 156 | for dimension in dimensions: 157 | dimension_name = dimension["name"] 158 | if dimension_name == "document_id": 159 | measures += [{"name": "ping_count", "type": "count"}] 160 | 161 | return measures 162 | -------------------------------------------------------------------------------- /generator/views/table_view.py: -------------------------------------------------------------------------------- 1 | """Class to describe a Table View.""" 2 | 3 | from __future__ import annotations 4 | 5 | from collections import defaultdict 6 | from itertools import filterfalse 7 | from typing import Any, Dict, Iterator, List, Optional, Set 8 | 9 | from click import ClickException 10 | 11 | from . import lookml_utils 12 | from .view import OMIT_VIEWS, View, ViewDict 13 | 14 | 15 | class TableView(View): 16 | """A view on any table.""" 17 | 18 | type: str = "table_view" 19 | measures: Optional[Dict[str, Dict[str, Any]]] 20 | 21 | def __init__( 22 | self, 23 | namespace: str, 24 | name: str, 25 | tables: List[Dict[str, str]], 26 | measures: Optional[Dict[str, Dict[str, Any]]] = None, 27 | ): 28 | """Create instance of a TableView.""" 29 | super().__init__(namespace, name, TableView.type, tables) 30 | self.measures = measures 31 | 32 | @classmethod 33 | def from_db_views( 34 | klass, 35 | namespace: str, 36 | is_glean: bool, 37 | channels: List[Dict[str, str]], 38 | db_views: dict, 39 | ) -> Iterator[TableView]: 40 | """Get Looker views for a namespace.""" 41 | view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 42 | for channel in channels: 43 | dataset = channel["dataset"] 44 | 45 | for view_id, references in db_views[dataset].items(): 46 | if view_id in OMIT_VIEWS: 47 | continue 48 | 49 | table_id = f"mozdata.{dataset}.{view_id}" 50 | table: Dict[str, str] = {"table": table_id} 51 | if "channel" in channel: 52 | table["channel"] = channel["channel"] 53 | 54 | view_tables[view_id][table_id] = table 55 | 56 | for view_id, tables_by_id in view_tables.items(): 57 | yield TableView(namespace, f"{view_id}_table", list(tables_by_id.values())) 58 | 59 | @classmethod 60 | def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> TableView: 61 | """Get a view from a name and dict definition.""" 62 | return TableView(namespace, name, _dict["tables"], _dict.get("measures")) 63 | 64 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 65 | """Generate LookML for this view.""" 66 | view_defn: Dict[str, Any] = {"name": self.name} 67 | 68 | # use schema for the table where channel=="release" or the first one 69 | table = next( 70 | (table for table in self.tables if table.get("channel") == "release"), 71 | self.tables[0], 72 | )["table"] 73 | 74 | # add dimensions and dimension groups 75 | dimensions = lookml_utils._generate_dimensions(table, dryrun=dryrun) 76 | view_defn["dimensions"] = list( 77 | filterfalse(lookml_utils._is_dimension_group, dimensions) 78 | ) 79 | view_defn["dimension_groups"] = list( 80 | filter(lookml_utils._is_dimension_group, dimensions) 81 | ) 82 | 83 | # add tag "time_partitioning_field" 84 | time_partitioning_fields: Set[str] = set( 85 | # filter out falsy values 86 | filter( 87 | None, (table.get("time_partitioning_field") for table in self.tables) 88 | ) 89 | ) 90 | if len(time_partitioning_fields) > 1: 91 | raise ClickException(f"Multiple time_partitioning_fields for {self.name!r}") 92 | elif len(time_partitioning_fields) == 1: 93 | field_name = time_partitioning_fields.pop() 94 | sql = f"${{TABLE}}.{field_name}" 95 | for group_defn in view_defn["dimension_groups"]: 96 | if group_defn["sql"] == sql: 97 | if "tags" not in group_defn: 98 | group_defn["tags"] = [] 99 | group_defn["tags"].append("time_partitioning_field") 100 | break 101 | else: 102 | raise ClickException( 103 | f"time_partitioning_field {field_name!r} not found in {self.name!r}" 104 | ) 105 | 106 | [project, dataset, table_id] = table.split(".") 107 | table_schema = dryrun.create( 108 | project=project, 109 | dataset=dataset, 110 | table=table_id, 111 | ).get_table_schema() 112 | nested_views = lookml_utils._generate_nested_dimension_views( 113 | table_schema, self.name 114 | ) 115 | 116 | if self.measures: 117 | view_defn["measures"] = [ 118 | {"name": measure_name, **measure_parameters} 119 | for measure_name, measure_parameters in self.measures.items() 120 | ] 121 | 122 | # parameterize table name 123 | if len(self.tables) > 1: 124 | view_defn["parameters"] = [ 125 | { 126 | "name": "channel", 127 | "type": "unquoted", 128 | "default_value": table, 129 | "allowed_values": [ 130 | { 131 | "label": _table["channel"].title(), 132 | "value": _table["table"], 133 | } 134 | for _table in self.tables 135 | ], 136 | } 137 | ] 138 | view_defn["sql_table_name"] = "`{% parameter channel %}`" 139 | else: 140 | view_defn["sql_table_name"] = f"`{table}`" 141 | 142 | return {"views": [view_defn] + nested_views} 143 | -------------------------------------------------------------------------------- /generator/views/view.py: -------------------------------------------------------------------------------- 1 | """Generic class to describe Looker views.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any, Dict, Iterator, List, Optional, Set, TypedDict 6 | 7 | from click import ClickException 8 | 9 | OMIT_VIEWS: Set[str] = set() 10 | 11 | 12 | # TODO: Once we upgrade to Python 3.11 mark just `measures` as non-required, not all keys. 13 | class ViewDict(TypedDict, total=False): 14 | """Represent a view definition.""" 15 | 16 | type: str 17 | tables: List[Dict[str, str]] 18 | measures: Dict[str, Dict[str, Any]] 19 | 20 | 21 | class View(object): 22 | """A generic Looker View.""" 23 | 24 | name: str 25 | view_type: str 26 | tables: List[Dict[str, Any]] 27 | namespace: str 28 | 29 | def __init__( 30 | self, 31 | namespace: str, 32 | name: str, 33 | view_type: str, 34 | tables: List[Dict[str, Any]], 35 | **kwargs, 36 | ): 37 | """Create an instance of a view.""" 38 | self.namespace = namespace 39 | self.tables = tables 40 | self.name = name 41 | self.view_type = view_type 42 | 43 | @classmethod 44 | def from_db_views( 45 | klass, 46 | namespace: str, 47 | is_glean: bool, 48 | channels: List[Dict[str, str]], 49 | db_views: dict, 50 | ) -> Iterator[View]: 51 | """Get Looker views from app.""" 52 | raise NotImplementedError("Only implemented in subclass.") 53 | 54 | @classmethod 55 | def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> View: 56 | """Get a view from a name and dict definition.""" 57 | raise NotImplementedError("Only implemented in subclass.") 58 | 59 | def get_type(self) -> str: 60 | """Get the type of this view.""" 61 | return self.view_type 62 | 63 | def as_dict(self) -> dict: 64 | """Get this view as a dictionary.""" 65 | return { 66 | "type": self.view_type, 67 | "tables": self.tables, 68 | } 69 | 70 | def __str__(self): 71 | """Stringify.""" 72 | return f"name: {self.name}, type: {self.type}, table: {self.tables}, namespace: {self.namespace}" 73 | 74 | def __eq__(self, other) -> bool: 75 | """Check for equality with other View.""" 76 | 77 | def comparable_dict(d): 78 | return {tuple(sorted([(k, str(v)) for k, v in t.items()])) for t in d} 79 | 80 | if isinstance(other, View): 81 | return ( 82 | self.name == other.name 83 | and self.view_type == other.view_type 84 | and comparable_dict(self.tables) == comparable_dict(other.tables) 85 | and self.namespace == other.namespace 86 | ) 87 | return False 88 | 89 | def get_dimensions( 90 | self, table, v1_name: Optional[str], dryrun 91 | ) -> List[Dict[str, Any]]: 92 | """Get the set of dimensions for this view.""" 93 | raise NotImplementedError("Only implemented in subclass.") 94 | 95 | def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 96 | """ 97 | Generate Lookml for this view. 98 | 99 | View instances can generate more than one Looker view, 100 | for e.g. nested fields and joins, so this returns 101 | a list. 102 | """ 103 | raise NotImplementedError("Only implemented in subclass.") 104 | 105 | def get_client_id(self, dimensions: List[dict], table: str) -> Optional[str]: 106 | """Return the first field that looks like a client identifier.""" 107 | client_id_fields = self.select_dimension( 108 | {"client_id", "client_info__client_id", "context_id"}, 109 | dimensions, 110 | table, 111 | ) 112 | # Some pings purposely disinclude client_ids, e.g. firefox installer 113 | return client_id_fields["name"] if client_id_fields else None 114 | 115 | def get_document_id(self, dimensions: List[dict], table: str) -> Optional[str]: 116 | """Return the first field that looks like a document_id.""" 117 | document_id = self.select_dimension("document_id", dimensions, table) 118 | return document_id["name"] if document_id else None 119 | 120 | def select_dimension( 121 | self, 122 | dimension_names: str | set[str], 123 | dimensions: List[dict], 124 | table: str, 125 | ) -> Optional[dict[str, str]]: 126 | """ 127 | Return the first field that matches dimension name. 128 | 129 | Throws if the query set is greater than one and more than one item is selected. 130 | """ 131 | if isinstance(dimension_names, str): 132 | dimension_names = {dimension_names} 133 | selected = [d for d in dimensions if d["name"] in dimension_names] 134 | if selected: 135 | # there should only be one dimension selected from the set 136 | # if there are multiple options in the dimention_names set. 137 | if len(dimension_names) > 1 and len(selected) > 1: 138 | raise ClickException( 139 | f"Duplicate {'/'.join(dimension_names)} dimension in {table!r}" 140 | ) 141 | return selected[0] 142 | return None 143 | -------------------------------------------------------------------------------- /namespaces-disallowlist.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | - burnham 3 | - firefox_echo_show 4 | - firefox_fire_tv 5 | - firefox_reality 6 | - firefox_reality_pc 7 | - lockwise_android 8 | - lockwise_ios 9 | - mlhackweek_search 10 | - pine 11 | - pioneer_* 12 | - rally_* 13 | - reference_browser 14 | - tiktokreporter_* 15 | - org_mozilla_ios_tiktok_reporter* 16 | - org_mozilla_tiktokreporter 17 | - moso_* 18 | - regrets_reporter 19 | - mozphab 20 | - mozregression 21 | - pine 22 | - treeherder 23 | - sync 24 | - mozillavpn_backend_cirrus 25 | - glean_dictionary 26 | - mach 27 | - "*_cirrus" 28 | - bedrock 29 | - firefox_desktop_background_tasks 30 | - hubs 31 | - gleanjs_docs 32 | - fakespot 33 | - review_checker_desktop 34 | - review_checker 35 | - debug_ping_view 36 | - firefox_crashreporter 37 | - firefox_desktop_background_defaultagent 38 | - thunderbird_android 39 | - fenix: 40 | views: 41 | - installation 42 | - installation_table 43 | - topsites_impression 44 | - topsites_impression_table 45 | explores: 46 | - installation 47 | - topsites_impression 48 | - firefox_ios: 49 | views: 50 | - temp_bookmarks_sync 51 | - temp_bookmarks_sync_table 52 | - temp_clients_sync 53 | - temp_clients_sync_table 54 | - temp_credit_cards_sync 55 | - temp_credit_cards_sync_table 56 | - temp_history_sync 57 | - temp_history_sync_table 58 | - temp_logins_sync 59 | - temp_logins_sync_table 60 | - temp_rust_tabs_sync 61 | - temp_rust_tabs_sync_table 62 | - temp_sync 63 | - temp_sync_table 64 | - temp_tabs_sync 65 | - temp_tabs_sync_table 66 | - topsites_impression 67 | - topsites_impression_table 68 | explores: 69 | - temp_bookmarks_sync 70 | - temp_clients_sync 71 | - temp_credit_cards_sync 72 | - temp_history_sync 73 | - temp_logins_sync 74 | - temp_rust_tabs_sync 75 | - temp_sync 76 | - temp_tabs_sync 77 | - topsites_impression 78 | - "*": # exclude these pings/views/explores from all namespaces 79 | views: 80 | - addresses_sync 81 | - addresses_sync_table 82 | - adjust_attribution 83 | - adjust_attribution_table 84 | - activation 85 | - activation_table 86 | - baseline_clients_daily 87 | - baseline_clients_first_seen 88 | - baseline_clients_first_seen_table 89 | - bounce_tracking_protection 90 | - bounce_tracking_protection_table 91 | - captcha_detection 92 | - captcha_detection_table 93 | - client_deduplication 94 | - client_deduplication_table 95 | - context_id_deletion_request 96 | - context_id_deletion_request_table 97 | - cookie_banner_report_site 98 | - cookie_banner_report_site_table 99 | - dau_reporting 100 | - dau_reporting_table 101 | - event_names 102 | - feature_usage 103 | - feature_usage_table 104 | - fog_validation 105 | - fog_validation_table 106 | - font_list 107 | - font_list_table 108 | - hang_report 109 | - hang_report_table 110 | - heartbeat 111 | - heartbeat_table 112 | - home 113 | - home_table 114 | - logins_sync 115 | - logins_sync_table 116 | - new_metric_capture_emulation 117 | - new_metric_capture_emulation_table 118 | - nimbus 119 | - nimbus_table 120 | - pageload 121 | - pageload_table 122 | - pocket_button 123 | - pocket_button_table 124 | - startup_timeline 125 | - startup_timeline_table 126 | - tabs_sync 127 | - tabs_sync_table 128 | - usage_deletion_request 129 | - usage_deletion_request_table 130 | - usage_reporting_clients_* 131 | explores: 132 | - addresses_sync 133 | - activation 134 | - adjust_attribution 135 | - baseline_clients_daily 136 | - baseline_clients_first_seen 137 | - baseline_clients_last_seen 138 | - bounce_tracking_protection 139 | - captcha_detection 140 | - client_deduplication 141 | - context_id_deletion_request 142 | - cookie_banner_report_site 143 | - dau_reporting 144 | - event_names 145 | - feature_usage 146 | - fog_validation 147 | - font_list 148 | - hang_report 149 | - heartbeat 150 | - home 151 | - logins_sync 152 | - new_metric_capture_emulation 153 | - nimbus 154 | - pageload 155 | - pocket_button 156 | - startup_timeline 157 | - tabs_sync 158 | - usage_deletion_request 159 | - usage_reporting_clients_* 160 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = 3 | --black 4 | --isort 5 | --mypy-ignore-missing-imports 6 | --pydocstyle 7 | --strict-markers 8 | filterwarnings = 9 | # upstream lib imports ABC improperly for backward compatibility 10 | ignore::DeprecationWarning:google.protobuf.descriptor 11 | ignore::DeprecationWarning:google.protobuf.internal.well_known_types 12 | # Silence: "Your application has authenticated using end user credentials from Google Cloud SDK" 13 | ignore::UserWarning:google.auth 14 | markers = 15 | integration: mark tests that check integration with external services. Skipped when not specifically enabled. 16 | norecursedirs = 17 | venv 18 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | click==8.2.1 2 | flake8==7.1.1 3 | exceptiongroup # for pytest on python<=3.10 4 | google-cloud-bigquery==3.34.0 5 | google-cloud-storage==3.2.0 6 | Jinja2==3.1.6 7 | lkml==1.3.7 8 | looker-sdk==25.10.0 9 | mozilla-metric-config-parser==2025.7.1 10 | mozilla-nimbus-schemas==3001.0.0 11 | mozilla-schema-generator==0.5.1 12 | pandas==2.3.1 13 | pip-tools==7.4.1 14 | pre-commit==4.2.0 15 | pyarrow==20.0.0 16 | pytest-black==0.6.0 17 | pytest-isort==4.0.0 18 | pytest-mypy==1.0.1 19 | pytest-pydocstyle==2.4.0 20 | pytest==7.4.4 21 | PyYAML==6.0.2 22 | tomli==2.2.1 # for toml parsing on python<3.11 23 | types-PyYaml==6.0.12.20250516 24 | yamllint==1.37.1 25 | gitpython==3.1.44 26 | spectacles==2.4.12 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Installation for lookml-generator.""" 2 | 3 | # -*- coding: utf-8 -*- 4 | 5 | # This Source Code Form is subject to the terms of the Mozilla Public 6 | # License, v. 2.0. If a copy of the MPL was not distributed with this 7 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 | 9 | from setuptools import find_packages, setup 10 | 11 | readme = open("README.md").read() 12 | 13 | setup( 14 | name="lookml-generator", 15 | python_requires=">=3.10.0", 16 | version="0.0.0", 17 | description="Generator LookML to represent Mozilla data.", 18 | long_description=readme, 19 | long_description_content_type="text/markdown", 20 | author="Frank Bertsch", 21 | author_email="frank@mozilla.com", 22 | url="https://github.com/mozilla/lookml-generator", 23 | packages=find_packages(include=["generator", "generator.*"]), 24 | package_dir={"lookml-generator": "generator"}, 25 | entry_points={ 26 | "console_scripts": [ 27 | "lookml-generator=generator.__main__:main", 28 | ] 29 | }, 30 | include_package_data=True, 31 | package_data={"generator": ["*/templates/*.lkml"]}, 32 | zip_safe=False, 33 | keywords="lookml-generator", 34 | classifiers=[ 35 | "Intended Audience :: Developers", 36 | "Programming Language :: Python :: 3", 37 | "Programming Language :: Python :: 3.10", 38 | ], 39 | ) 40 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests.""" 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """PyTest configuration.""" 2 | 3 | import json 4 | 5 | import pytest 6 | 7 | 8 | def pytest_collection_modifyitems(config, items): 9 | """Skip integration tests unless a keyword or marker filter is specified.""" 10 | keywordexpr = config.option.keyword 11 | markexpr = config.option.markexpr 12 | if keywordexpr or markexpr: 13 | return 14 | 15 | skip_integration = pytest.mark.skip(reason="integration marker not selected") 16 | 17 | for item in items: 18 | if "integration" in item.keywords: 19 | item.add_marker(skip_integration) 20 | 21 | 22 | @pytest.fixture 23 | def app_listings_uri(tmp_path): 24 | """ 25 | Mock app listings. 26 | 27 | See: https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings 28 | """ 29 | dest = tmp_path / "app-listings" 30 | dest.write_bytes( 31 | json.dumps( 32 | [ 33 | { 34 | "app_name": "glean-app", 35 | "app_channel": "release", 36 | "canonical_app_name": "Glean App", 37 | "bq_dataset_family": "glean_app_release", 38 | "notification_emails": ["glean-app-owner@allizom.com"], 39 | "v1_name": "glean-app-release", 40 | }, 41 | { 42 | "app_name": "glean-app", 43 | "app_channel": "beta", 44 | "canonical_app_name": "Glean App Beta", 45 | "bq_dataset_family": "glean_app_beta", 46 | "notification_emails": ["glean-app-owner-beta@allizom.com"], 47 | "v1_name": "glean-app-beta", 48 | }, 49 | ] 50 | ).encode() 51 | ) 52 | return dest.absolute().as_uri() 53 | 54 | 55 | @pytest.fixture 56 | def metrics_listings_file(tmp_path): 57 | """Mock metrics listings.""" 58 | dest = tmp_path / "metrics-listings" 59 | dest.write_bytes( 60 | json.dumps( 61 | { 62 | "test.counter": { 63 | "type": "counter", 64 | }, 65 | "glean_validation_metrics.ping_count": { 66 | "type": "counter", 67 | }, 68 | } 69 | ).encode() 70 | ) 71 | return dest.absolute() 72 | 73 | 74 | @pytest.fixture 75 | def glean_apps(): 76 | """Mock processed version of app listings (see above).""" 77 | return [ 78 | { 79 | "name": "glean-app", 80 | "glean_app": True, 81 | "pretty_name": "Glean App", 82 | "owners": [ 83 | "glean-app-owner@allizom.com", 84 | ], 85 | "channels": [ 86 | { 87 | "channel": "release", 88 | "dataset": "glean_app", 89 | "source_dataset": "glean_app_release", 90 | }, 91 | { 92 | "channel": "beta", 93 | "dataset": "glean_app_beta", 94 | "source_dataset": "glean_app_beta_stable", 95 | }, 96 | ], 97 | "v1_name": "glean-app-release", 98 | } 99 | ] 100 | -------------------------------------------------------------------------------- /tests/data/metric-hub/definitions/fenix.toml: -------------------------------------------------------------------------------- 1 | [metrics] 2 | 3 | [metrics.uri_count] 4 | data_source = "baseline" 5 | select_expression = '{{agg_sum("metrics.counter.events_total_uri_count")}}' 6 | friendly_name = "URIs visited" 7 | description = "Counts the number of URIs each client visited" 8 | 9 | [metrics.active_hours] 10 | select_expression = "COALESCE(SUM(metrics.timespan.glean_baseline_duration.value), 0) / 3600.0" 11 | data_source = "baseline" 12 | friendly_name = "Active Hours" 13 | description = "Total time Firefox was active" 14 | 15 | 16 | [metrics.performance_pageload_dcl] 17 | data_source = "metrics" 18 | select_expression = "ARRAY_AGG(metrics.timing_distribution.performance_pageload_dcl IGNORE NULLS)" 19 | friendly_name = "Pageload DCL" 20 | description = "Time in milliseconds from navigationStart to domContentLoaded for the foreground http or https root content document." 21 | category = "performance" 22 | type = "histogram" 23 | 24 | [metrics.performance_pageload_dcl_responsestart] 25 | data_source = "metrics" 26 | select_expression = "ARRAY_AGG(metrics.timing_distribution.performance_pageload_dcl_responsestart IGNORE NULLS)" 27 | friendly_name = "Pageload DCL Response Start" 28 | description = "Time in milliseconds from responseStart to domContentLoaded for the foreground http or https root content document." 29 | category = "performance" 30 | type = "histogram" 31 | 32 | [metrics.performance_pageload_fcp] 33 | data_source = "metrics" 34 | select_expression = "ARRAY_AGG(metrics.timing_distribution.performance_pageload_fcp IGNORE NULLS)" 35 | friendly_name = "Pageload FCP" 36 | description = "The time between navigationStart and the first contentful paint of a foreground http or https root content document, in milliseconds. The contentful paint timestamp is taken during display list building and does not include rasterization or compositing of that paint." 37 | category = "performance" 38 | type = "histogram" 39 | 40 | 41 | [data_sources.baseline] 42 | from_expression = """( 43 | SELECT 44 | p.*, 45 | DATE(p.submission_timestamp) AS submission_date 46 | FROM `moz-fx-data-shared-prod.{dataset}.baseline` p 47 | )""" 48 | client_id_column = "client_info.client_id" 49 | experiments_column_type = "glean" 50 | default_dataset = "org_mozilla_firefox" 51 | friendly_name = "Baseline" 52 | description = "Baseline Ping" 53 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')" 54 | 55 | [data_sources.baseline_v2] 56 | from_expression = """( 57 | SELECT 58 | p.*, 59 | DATE(p.submission_timestamp) AS submission_date 60 | FROM `moz-fx-data-shared-prod.{dataset}.baseline` p 61 | )""" 62 | client_id_column = "client_info.client_id" 63 | submission_date_column = "DATE(submission_timestamp)" 64 | experiments_column_type = "glean" 65 | default_dataset = "fenix" 66 | friendly_name = "Baseline" 67 | description = "Baseline Ping" 68 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')" 69 | 70 | [data_sources.events] 71 | from_expression = """( 72 | SELECT 73 | p.* EXCEPT (events), 74 | DATE(p.submission_timestamp) AS submission_date, 75 | event 76 | FROM 77 | `moz-fx-data-shared-prod.{dataset}.events` p 78 | CROSS JOIN 79 | UNNEST(p.events) AS event 80 | )""" 81 | client_id_column = "client_info.client_id" 82 | experiments_column_type = "glean" 83 | default_dataset = "org_mozilla_firefox" 84 | friendly_name = "Events" 85 | description = "Events Ping" 86 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')" 87 | 88 | [data_sources.metrics] 89 | from_expression = """( 90 | SELECT 91 | p.*, 92 | DATE(p.submission_timestamp) AS submission_date 93 | FROM `moz-fx-data-shared-prod.{dataset}.metrics` p 94 | )""" 95 | client_id_column = "client_info.client_id" 96 | experiments_column_type = "glean" 97 | default_dataset = "org_mozilla_firefox" 98 | friendly_name = "Metrics" 99 | description = "Metrics Ping" 100 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')" 101 | -------------------------------------------------------------------------------- /tests/test_events.py: -------------------------------------------------------------------------------- 1 | import lkml 2 | import pytest 3 | 4 | from generator.explores import EventsExplore 5 | from generator.views import EventsView 6 | 7 | from .utils import MockDryRun, MockDryRunContext, print_and_test 8 | 9 | 10 | class MockDryRunEvents(MockDryRun): 11 | """Mock dryrun.DryRun.""" 12 | 13 | def get_table_schema(self): 14 | """Mock dryrun.DryRun.get_table_schema""" 15 | 16 | return [ 17 | { 18 | "name": "client_info", 19 | "type": "RECORD", 20 | "fields": [{"name": "client_id", "type": "STRING"}], 21 | }, 22 | {"name": "event_id", "type": "STRING"}, 23 | ] 24 | 25 | 26 | @pytest.fixture() 27 | def events_view(): 28 | return EventsView( 29 | "glean_app", 30 | "events", 31 | [ 32 | { 33 | "events_table_view": "events_unnested_table", 34 | "base_table": "mozdata.glean_app.events_unnested", 35 | }, 36 | ], 37 | ) 38 | 39 | 40 | @pytest.fixture(params=["submission", "timestamp"]) 41 | def time_partitioning_group(request): 42 | return request.param 43 | 44 | 45 | @pytest.fixture() 46 | def events_explore(events_view, tmp_path, time_partitioning_group): 47 | (tmp_path / "events_unnested_table.view.lkml").write_text( 48 | lkml.dump( 49 | { 50 | "views": [ 51 | { 52 | "name": "events_unnested_table", 53 | "dimensions": [ 54 | { 55 | "name": "client_info__client_count", 56 | "type": "string", 57 | }, 58 | ], 59 | "dimension_groups": [ 60 | { 61 | "name": time_partitioning_group, 62 | "tags": ( 63 | ["time_partitioning_field"] 64 | if time_partitioning_group != "submission" 65 | else [] 66 | ), 67 | "type": "time", 68 | "timeframes": [ 69 | "raw", 70 | "time", 71 | "date", 72 | ], 73 | } 74 | ], 75 | } 76 | ] 77 | } 78 | ) 79 | ) 80 | (tmp_path / "events.view.lkml").write_text( 81 | lkml.dump( 82 | { 83 | "views": [ 84 | { 85 | "name": "events", 86 | "measures": [ 87 | { 88 | "name": "event_count", 89 | "type": "count", 90 | } 91 | ], 92 | } 93 | ] 94 | } 95 | ) 96 | ) 97 | return EventsExplore( 98 | "events", 99 | {"base_view": "events", "extended_view": "events_unnested_table"}, 100 | tmp_path, 101 | ) 102 | 103 | 104 | def test_view_from_db_views(events_view): 105 | db_views = { 106 | "glean_app": { 107 | "events": [["mozdata", "glean_app", "events"]], 108 | "events_unnested": [["mozdata", "glean_app", "events_unnested"]], 109 | } 110 | } 111 | 112 | channels = [ 113 | {"channel": "release", "dataset": "glean_app"}, 114 | {"channel": "beta", "dataset": "glean_app_beta"}, 115 | ] 116 | 117 | actual = next(EventsView.from_db_views("glean_app", True, channels, db_views)) 118 | 119 | assert actual == events_view 120 | 121 | 122 | def test_view_from_dict(events_view): 123 | actual = EventsView.from_dict( 124 | "glean_app", 125 | "events", 126 | { 127 | "type": "events_view", 128 | "tables": [ 129 | { 130 | "events_table_view": "events_unnested_table", 131 | "base_table": "mozdata.glean_app.events_unnested", 132 | } 133 | ], 134 | }, 135 | ) 136 | 137 | assert actual == events_view 138 | 139 | 140 | def test_explore_from_views(events_view, events_explore): 141 | views = [events_view] 142 | actual = next(EventsExplore.from_views(views)) 143 | 144 | assert actual == events_explore 145 | 146 | 147 | def test_explore_from_dict(events_explore, tmp_path): 148 | actual = EventsExplore.from_dict( 149 | "events", 150 | {"views": {"base_view": "events", "extended_view": "events_unnested_table"}}, 151 | tmp_path, 152 | ) 153 | assert actual == events_explore 154 | 155 | 156 | def test_view_lookml(events_view): 157 | expected = { 158 | "includes": ["events_unnested_table.view.lkml"], 159 | "views": [ 160 | { 161 | "name": "events", 162 | "extends": ["events_unnested_table"], 163 | "measures": [ 164 | { 165 | "name": "event_count", 166 | "description": ("The number of times the event(s) occurred."), 167 | "type": "count", 168 | }, 169 | { 170 | "name": "client_count", 171 | "description": ( 172 | "The number of clients that completed the event(s)." 173 | ), 174 | "type": "count_distinct", 175 | "sql": "${client_info__client_id}", 176 | }, 177 | ], 178 | "dimensions": [ 179 | { 180 | "name": "event_id", 181 | "primary_key": "yes", 182 | }, 183 | ], 184 | }, 185 | ], 186 | } 187 | 188 | mock_dryrun = MockDryRunContext(MockDryRunEvents, False) 189 | 190 | actual = events_view.to_lookml(None, dryrun=mock_dryrun) 191 | print_and_test(expected=expected, actual=actual) 192 | 193 | 194 | def test_explore_lookml(time_partitioning_group, events_explore): 195 | date_dimension = f"{time_partitioning_group}_date" 196 | expected = [ 197 | { 198 | "name": "event_counts", 199 | "view_name": "events", 200 | "description": "Event counts over time.", 201 | "always_filter": { 202 | "filters": [ 203 | {date_dimension: "28 days"}, 204 | ] 205 | }, 206 | "sql_always_where": f"${{events.{date_dimension}}} >= '2010-01-01'", 207 | "queries": [ 208 | { 209 | "description": "Event counts from all events over the past two weeks.", 210 | "dimensions": [date_dimension], 211 | "measures": ["event_count"], 212 | "filters": [ 213 | {date_dimension: "14 days"}, 214 | ], 215 | "name": "all_event_counts", 216 | }, 217 | ], 218 | "joins": [], 219 | }, 220 | ] 221 | 222 | actual = events_explore.to_lookml(None, None) 223 | print_and_test(expected=expected, actual=actual) 224 | -------------------------------------------------------------------------------- /tests/test_glean_ping_view.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, patch 2 | 3 | from mozilla_schema_generator.probes import GleanProbe 4 | 5 | from generator.views import GleanPingView 6 | 7 | from .utils import MockDryRun, MockDryRunContext 8 | 9 | 10 | class MockDryRunPingView(MockDryRun): 11 | """Mock dryrun.DryRun.""" 12 | 13 | def get_table_schema(self): 14 | """Mock dryrun.DryRun.get_table_schema""" 15 | table_id = f"{self.project}.{self.dataset}.{self.table}" 16 | 17 | if table_id == "mozdata.glean_app.dash_name": 18 | return [ 19 | { 20 | "name": "metrics", 21 | "type": "RECORD", 22 | "fields": [ 23 | { 24 | "name": "string", 25 | "type": "RECORD", 26 | "fields": [{"name": "fun_string_metric", "type": "STRING"}], 27 | }, 28 | { 29 | "name": "url2", 30 | "type": "RECORD", 31 | "fields": [{"name": "fun_url_metric", "type": "STRING"}], 32 | }, 33 | { 34 | "name": "datetime", 35 | "type": "RECORD", 36 | "fields": [ 37 | {"name": "fun_datetime_metric", "type": "TIMESTAMP"} 38 | ], 39 | }, 40 | { 41 | "name": "labeled_counter", 42 | "type": "RECORD", 43 | "fields": [ 44 | { 45 | "name": "fun_counter_metric", 46 | "type": "STRING", 47 | "mode": "REPEATED", 48 | "fields": [ 49 | {"name": "key", "type": "STRING"}, 50 | {"name": "value", "type": "INT64"}, 51 | ], 52 | } 53 | ], 54 | }, 55 | ], 56 | } 57 | ] 58 | 59 | raise ValueError(f"Table not found: {table_id}") 60 | 61 | 62 | @patch("generator.views.glean_ping_view.GleanPing") 63 | def test_kebab_case(mock_glean_ping): 64 | """ 65 | Tests that we handle metrics from kebab-case pings 66 | """ 67 | mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}] 68 | glean_app = Mock() 69 | glean_app.get_probes.return_value = [ 70 | GleanProbe( 71 | "fun.string_metric", 72 | { 73 | "type": "string", 74 | "history": [ 75 | { 76 | "send_in_pings": ["dash-name"], 77 | "dates": { 78 | "first": "2020-01-01 00:00:00", 79 | "last": "2020-01-02 00:00:00", 80 | }, 81 | } 82 | ], 83 | "name": "string_metric", 84 | }, 85 | ), 86 | ] 87 | mock_dryrun = MockDryRunContext(MockDryRunPingView, False) 88 | mock_glean_ping.return_value = glean_app 89 | view = GleanPingView( 90 | "glean_app", 91 | "dash_name", 92 | [{"channel": "release", "table": "mozdata.glean_app.dash_name"}], 93 | ) 94 | lookml = view.to_lookml("glean-app", dryrun=mock_dryrun) 95 | assert len(lookml["views"]) == 1 96 | assert len(lookml["views"][0]["dimensions"]) == 1 97 | assert ( 98 | lookml["views"][0]["dimensions"][0]["name"] 99 | == "metrics__string__fun_string_metric" 100 | ) 101 | 102 | 103 | @patch("generator.views.glean_ping_view.GleanPing") 104 | def test_url_metric(mock_glean_ping): 105 | """ 106 | Tests that we handle URL metrics 107 | """ 108 | mock_dryrun = MockDryRunContext(MockDryRunPingView, False) 109 | mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}] 110 | glean_app = Mock() 111 | glean_app.get_probes.return_value = [ 112 | GleanProbe( 113 | "fun.url_metric", 114 | { 115 | "type": "url", 116 | "history": [ 117 | { 118 | "send_in_pings": ["dash-name"], 119 | "dates": { 120 | "first": "2020-01-01 00:00:00", 121 | "last": "2020-01-02 00:00:00", 122 | }, 123 | } 124 | ], 125 | "name": "url_metric", 126 | }, 127 | ), 128 | ] 129 | mock_glean_ping.return_value = glean_app 130 | view = GleanPingView( 131 | "glean_app", 132 | "dash_name", 133 | [{"channel": "release", "table": "mozdata.glean_app.dash_name"}], 134 | ) 135 | lookml = view.to_lookml("glean-app", dryrun=mock_dryrun) 136 | assert len(lookml["views"]) == 1 137 | assert len(lookml["views"][0]["dimensions"]) == 1 138 | assert ( 139 | lookml["views"][0]["dimensions"][0]["name"] == "metrics__url2__fun_url_metric" 140 | ) 141 | 142 | 143 | @patch("generator.views.glean_ping_view.GleanPing") 144 | def test_datetime_metric(mock_glean_ping): 145 | """ 146 | Tests that we handle datetime metrics 147 | """ 148 | mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}] 149 | glean_app = Mock() 150 | mock_dryrun = MockDryRunContext(MockDryRunPingView, False) 151 | glean_app.get_probes.return_value = [ 152 | GleanProbe( 153 | "fun.datetime_metric", 154 | { 155 | "type": "datetime", 156 | "history": [ 157 | { 158 | "send_in_pings": ["dash-name"], 159 | "dates": { 160 | "first": "2020-01-01 00:00:00", 161 | "last": "2020-01-02 00:00:00", 162 | }, 163 | } 164 | ], 165 | "name": "datetime_metric", 166 | }, 167 | ), 168 | ] 169 | mock_glean_ping.return_value = glean_app 170 | view = GleanPingView( 171 | "glean_app", 172 | "dash_name", 173 | [{"channel": "release", "table": "mozdata.glean_app.dash_name"}], 174 | ) 175 | lookml = view.to_lookml("glean-app", dryrun=mock_dryrun) 176 | assert len(lookml["views"]) == 1 177 | assert len(lookml["views"][0]["dimension_groups"]) == 1 178 | assert ( 179 | lookml["views"][0]["dimension_groups"][0]["name"] 180 | == "metrics__datetime__fun_datetime_metric" 181 | ) 182 | assert "timeframes" in lookml["views"][0]["dimension_groups"][0] 183 | assert "group_label" not in lookml["views"][0]["dimension_groups"][0] 184 | assert "group_item_label" not in lookml["views"][0]["dimension_groups"][0] 185 | assert "links" not in lookml["views"][0]["dimension_groups"][0] 186 | 187 | 188 | @patch("generator.views.glean_ping_view.GleanPing") 189 | def test_undeployed_probe(mock_glean_ping): 190 | """ 191 | Tests that we handle metrics not yet deployed to bigquery 192 | """ 193 | mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}] 194 | glean_app = Mock() 195 | glean_app.get_probes.return_value = [ 196 | GleanProbe( 197 | f"fun.{name}", 198 | { 199 | "type": "labeled_counter", 200 | "history": [ 201 | { 202 | "send_in_pings": ["dash-name"], 203 | "dates": { 204 | "first": "2020-01-01 00:00:00", 205 | "last": "2020-01-02 00:00:00", 206 | }, 207 | } 208 | ], 209 | "name": name, 210 | }, 211 | ) 212 | # "counter_metric2" represents a probe not present in the table schema 213 | for name in ["counter_metric", "counter_metric2"] 214 | ] 215 | mock_glean_ping.return_value = glean_app 216 | mock_dryrun = MockDryRunContext(MockDryRunPingView, False) 217 | view = GleanPingView( 218 | "glean_app", 219 | "dash_name", 220 | [{"channel": "release", "table": "mozdata.glean_app.dash_name"}], 221 | ) 222 | lookml = view.to_lookml("glean-app", dryrun=mock_dryrun) 223 | # In addition to the table view, each labeled counter adds a join view and a suggest 224 | # view. Expect 3 views, because 1 for the table view, 2 added for fun.counter_metric 225 | # because it's in the table schema, and 0 added for fun.counter_metric2 because it's 226 | # not in the table schema. 227 | assert len(lookml["views"]) == 2 228 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from google.cloud import bigquery 3 | 4 | 5 | @pytest.fixture 6 | def client(): 7 | return bigquery.Client() 8 | 9 | 10 | @pytest.mark.integration 11 | def test_google_connection(client): 12 | job = client.query("SELECT NULL") 13 | assert [(None,)] == [tuple(row) for row in job.result()] 14 | -------------------------------------------------------------------------------- /tests/test_lookml_utils.py: -------------------------------------------------------------------------------- 1 | from generator.views.lookml_utils import escape_filter_expr 2 | 3 | 4 | def test_escape_char(): 5 | expr = "a_b" 6 | assert escape_filter_expr(expr) == "a^_b" 7 | 8 | 9 | def test_escape_multi_char(): 10 | expr = 'a_b%c,d"f^g' 11 | assert escape_filter_expr(expr) == 'a^_b^%c^,d^"f^^g' 12 | 13 | 14 | def test_escape_leading_char(): 15 | expr = "-a-b" 16 | assert escape_filter_expr(expr) == "^-a-b" 17 | -------------------------------------------------------------------------------- /tests/test_spoke.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import Mock, patch 3 | 4 | import lkml 5 | import looker_sdk as _looker_sdk 6 | import pytest 7 | 8 | from generator.spoke import generate_directories 9 | 10 | from .utils import print_and_test 11 | 12 | 13 | @pytest.fixture() 14 | def namespaces() -> dict: 15 | return { 16 | "glean-app": { 17 | "pretty_name": "Glean App", 18 | "glean_app": True, 19 | "spoke": "looker-spoke-default", 20 | "views": { 21 | "baseline": { 22 | "type": "ping_view", 23 | "tables": [ 24 | { 25 | "channel": "release", 26 | "table": "mozdata.glean_app.baseline", 27 | } 28 | ], 29 | } 30 | }, 31 | "explores": { 32 | "baseline": {"type": "ping_explore", "views": {"base_view": "baseline"}} 33 | }, 34 | } 35 | } 36 | 37 | 38 | @pytest.fixture 39 | def custom_namespaces(): 40 | return { 41 | "custom": { 42 | "glean_app": False, 43 | "spoke": "looker-spoke-private", 44 | "connection": "bigquery-oauth", 45 | "owners": ["custom-owner@allizom.com", "custom-owner2@allizom.com"], 46 | "pretty_name": "Custom", 47 | "views": { 48 | "baseline": { 49 | "tables": [ 50 | {"channel": "release", "table": "mozdata.custom.baseline"} 51 | ], 52 | "type": "ping_view", 53 | } 54 | }, 55 | } 56 | } 57 | 58 | 59 | @patch("generator.spoke.looker_sdk") 60 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilladev.cloud.looker.com"}) 61 | def test_generate_directories(looker_sdk, namespaces, tmp_path): 62 | sdk = looker_sdk.init40() 63 | sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)] 64 | sdk.lookml_model.side_effect = _looker_sdk.error.SDKError("msg") 65 | looker_sdk.error = Mock(SDKError=_looker_sdk.error.SDKError) 66 | 67 | generate_directories(namespaces, tmp_path, True) 68 | dirs = list((tmp_path / "looker-spoke-default").iterdir()) 69 | assert dirs == [tmp_path / "looker-spoke-default" / "glean-app"] 70 | 71 | app_path = tmp_path / "looker-spoke-default" / "glean-app/" 72 | sub_dirs = set(app_path.iterdir()) 73 | assert sub_dirs == { 74 | app_path / "views", 75 | app_path / "explores", 76 | app_path / "dashboards", 77 | app_path / "glean-app.model.lkml", 78 | } 79 | 80 | sdk.create_lookml_model.assert_called_once() 81 | sdk.update_model_set.assert_called_once() 82 | 83 | 84 | @patch("generator.spoke.looker_sdk") 85 | def test_generate_directories_no_sdk(looker_sdk, namespaces, tmp_path): 86 | sdk = looker_sdk.init40() 87 | sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)] 88 | 89 | generate_directories(namespaces, tmp_path, False) 90 | dirs = list((tmp_path / "looker-spoke-default").iterdir()) 91 | assert dirs == [tmp_path / "looker-spoke-default" / "glean-app"] 92 | 93 | app_path = tmp_path / "looker-spoke-default" / "glean-app" 94 | sub_dirs = set(app_path.iterdir()) 95 | assert sub_dirs == { 96 | app_path / "views", 97 | app_path / "explores", 98 | app_path / "dashboards", 99 | app_path / "glean-app.model.lkml", 100 | } 101 | 102 | assert (app_path / "dashboards" / ".gitkeep").exists() 103 | 104 | sdk.create_lookml_model.assert_not_called() 105 | 106 | 107 | @patch("generator.spoke.looker_sdk") 108 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilladev.cloud.looker.com"}) 109 | def test_existing_dir(looker_sdk, namespaces, tmp_path): 110 | sdk = looker_sdk.init40() 111 | sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)] 112 | 113 | generate_directories(namespaces, tmp_path, True) 114 | tmp_file = tmp_path / "looker-spoke-default" / "glean-app" / "tmp-file" 115 | tmp_file.write_text("hello, world") 116 | 117 | generate_directories(namespaces, tmp_path) 118 | 119 | # We shouldn't overwrite this dir 120 | assert tmp_file.is_file() 121 | 122 | 123 | @patch("generator.spoke.looker_sdk") 124 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilla.cloud.looker.com"}) 125 | def test_generate_model(looker_sdk, namespaces, tmp_path): 126 | sdk = looker_sdk.init40() 127 | sdk.search_model_sets.side_effect = [[Mock(models=["model"], id=1)]] 128 | sdk.lookml_model.side_effect = _looker_sdk.error.SDKError("msg") 129 | looker_sdk.error = Mock(SDKError=_looker_sdk.error.SDKError) 130 | 131 | write_model = Mock() 132 | looker_sdk.models40.WriteModelSet.return_value = write_model 133 | 134 | generate_directories(namespaces, tmp_path, True) 135 | expected_dict = { 136 | "connection": "telemetry", 137 | "label": "Glean App", 138 | "includes": ["//looker-hub/glean-app/explores/*"], 139 | } 140 | 141 | expected_text = """connection: "telemetry" 142 | label: "Glean App" 143 | # Include files from looker-hub or spoke-default below. For example: 144 | include: "//looker-hub/glean-app/explores/*" 145 | # include: "//looker-hub/glean-app/dashboards/*" 146 | # include: "views/*" 147 | # include: "explores/*" 148 | # include: "dashboards/*" 149 | """ 150 | actual_text = ( 151 | tmp_path / "looker-spoke-default" / "glean-app" / "glean-app.model.lkml" 152 | ).read_text() 153 | actual_dict = lkml.load(actual_text) 154 | assert expected_text == actual_text 155 | assert expected_dict == actual_dict 156 | 157 | looker_sdk.models40.WriteModelSet.assert_any_call(models=["model", "glean-app"]) 158 | assert looker_sdk.models40.WriteModelSet.call_count == 1 159 | 160 | sdk.update_model_set.assert_any_call(1, write_model) 161 | 162 | 163 | @patch("generator.spoke.looker_sdk") 164 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilladev.cloud.looker.com"}) 165 | def test_alternate_connection(looker_sdk, custom_namespaces, tmp_path): 166 | sdk = looker_sdk.init40() 167 | sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)] 168 | sdk.lookml_model.side_effect = _looker_sdk.error.SDKError("msg") 169 | looker_sdk.error = Mock(SDKError=_looker_sdk.error.SDKError) 170 | 171 | write_model = Mock() 172 | looker_sdk.models40.WriteLookmlModel.return_value = write_model 173 | 174 | generate_directories(custom_namespaces, tmp_path, True) 175 | dirs = list((tmp_path / "looker-spoke-private").iterdir()) 176 | assert dirs == [tmp_path / "looker-spoke-private" / "custom"] 177 | 178 | app_path = tmp_path / "looker-spoke-private" / "custom" 179 | sub_dirs = set(app_path.iterdir()) 180 | assert sub_dirs == { 181 | app_path / "views", 182 | app_path / "explores", 183 | app_path / "dashboards", 184 | app_path / "custom.model.lkml", 185 | } 186 | 187 | expected_dict = { 188 | "connection": "bigquery-oauth", 189 | "label": "Custom", 190 | } 191 | expected_text = """connection: "bigquery-oauth" 192 | label: "Custom" 193 | # Include files from looker-hub or spoke-default below. For example: 194 | # include: "//looker-hub/custom/explores/*" 195 | # include: "//looker-hub/custom/dashboards/*" 196 | # include: "views/*" 197 | # include: "explores/*" 198 | # include: "dashboards/*" 199 | """ 200 | actual_text = ( 201 | tmp_path / "looker-spoke-private" / "custom" / "custom.model.lkml" 202 | ).read_text() 203 | actual_dict = lkml.load(actual_text) 204 | print_and_test(expected_text, actual_text) 205 | print_and_test(expected_dict, actual_dict) 206 | 207 | looker_sdk.models40.WriteLookmlModel.assert_called_with( 208 | allowed_db_connection_names=["bigquery-oauth"], 209 | name="custom", 210 | project_name="spoke-private", 211 | ) 212 | sdk.create_lookml_model.assert_called_with(write_model) 213 | sdk.update_model_set.assert_called_once() 214 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for tests.""" 2 | 3 | import pprint 4 | 5 | 6 | def get_differences(expected, result, path="", sep="."): 7 | """ 8 | Get the differences between two JSON-like python objects. 9 | 10 | For complicated objects, this is a big improvement over pytest -vv. 11 | """ 12 | differences = [] 13 | 14 | if expected is not None and result is None: 15 | differences.append(("Expected exists but not Result", path)) 16 | if expected is None and result is not None: 17 | differences.append(("Result exists but not Expected", path)) 18 | if expected is None and result is None: 19 | return differences 20 | 21 | exp_is_dict, res_is_dict = isinstance(expected, dict), isinstance(result, dict) 22 | exp_is_list, res_is_list = isinstance(expected, list), isinstance(result, list) 23 | if exp_is_dict and not res_is_dict: 24 | differences.append(("Expected is dict but not Result", path)) 25 | elif res_is_dict and not exp_is_dict: 26 | differences.append(("Result is dict but not Expected", path)) 27 | elif not exp_is_dict and not res_is_dict: 28 | if exp_is_list and res_is_list: 29 | for i in range(max(len(expected), len(result))): 30 | if i >= len(result): 31 | differences.append( 32 | (f"Result missing element {expected[i]}", path + sep + str(i)) 33 | ) 34 | elif i >= len(expected): 35 | differences.append( 36 | ( 37 | f"Result contains extra element {result[i]}", 38 | path + sep + str(i), 39 | ) 40 | ) 41 | else: 42 | differences += get_differences( 43 | expected[i], result[i], path + sep + str(i) 44 | ) 45 | elif expected != result: 46 | differences.append((f"Expected={expected}, Result={result}", path)) 47 | else: 48 | exp_keys, res_keys = set(expected.keys()), set(result.keys()) 49 | in_exp_not_res, in_res_not_exp = exp_keys - res_keys, res_keys - exp_keys 50 | 51 | for k in in_exp_not_res: 52 | differences.append(("In Expected, not in Result", path + sep + k)) 53 | for k in in_res_not_exp: 54 | differences.append(("In Result, not in Expected", path + sep + k)) 55 | 56 | for k in exp_keys & res_keys: 57 | differences += get_differences(expected[k], result[k], path + sep + k) 58 | 59 | return differences 60 | 61 | 62 | def print_and_test(expected, result=None, actual=None): 63 | """Print objects and differences, then test equality.""" 64 | pp = pprint.PrettyPrinter(indent=2) 65 | if actual is not None: 66 | result = actual 67 | 68 | print("\nExpected:") 69 | pp.pprint(expected) 70 | 71 | print("\nActual:") 72 | pp.pprint(result) 73 | 74 | print("\nDifferences:") 75 | print("\n".join([" - ".join(v) for v in get_differences(expected, result)])) 76 | 77 | assert result == expected 78 | 79 | 80 | class MockDryRunContext: 81 | """Mock DryRunContext.""" 82 | 83 | def __init__( 84 | self, 85 | cls, 86 | use_cloud_function=False, 87 | id_token=None, 88 | credentials=None, 89 | ): 90 | """Initialize dry run instance.""" 91 | self.use_cloud_function = use_cloud_function 92 | self.id_token = id_token 93 | self.credentials = credentials 94 | self.cls = cls 95 | 96 | def create( 97 | self, 98 | sql=None, 99 | project="moz-fx-data-shared-prod", 100 | dataset=None, 101 | table=None, 102 | ): 103 | """Initialize passed MockDryRun instance.""" 104 | return self.cls( 105 | use_cloud_function=self.use_cloud_function, 106 | id_token=self.id_token, 107 | credentials=self.credentials, 108 | sql=sql, 109 | project=project, 110 | dataset=dataset, 111 | table=table, 112 | ) 113 | 114 | 115 | class MockDryRun: 116 | """Mock dryrun.DryRun.""" 117 | 118 | def __init__( 119 | self, 120 | use_cloud_function, 121 | id_token, 122 | credentials, 123 | sql=None, 124 | project=None, 125 | dataset=None, 126 | table=None, 127 | ): 128 | """Create MockDryRun instance.""" 129 | self.sql = sql 130 | self.project = project 131 | self.dataset = dataset 132 | self.table = table 133 | self.use_cloud_function = use_cloud_function 134 | self.credentials = credentials 135 | self.id_token = id_token 136 | --------------------------------------------------------------------------------