├── .circleci
    ├── config.yml
    └── post-diff.js
├── .dockerignore
├── .flake8
├── .github
    └── dependabot.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── .yamllint.yaml
├── CODEOWNERS
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── architecture
    ├── lookml.jpg
    ├── namespaces.jpg
    └── namespaces_yaml.md
├── bin
    ├── dev_branches
    ├── generate
    └── generator
├── custom-namespaces.yaml
├── docker-compose.yml
├── generator
    ├── __init__.py
    ├── __main__.py
    ├── dashboards
    │   ├── __init__.py
    │   ├── dashboard.py
    │   ├── operational_monitoring_dashboard.py
    │   └── templates
    │   │   └── dashboard.lkml
    ├── dryrun.py
    ├── explores
    │   ├── __init__.py
    │   ├── client_counts_explore.py
    │   ├── events_explore.py
    │   ├── explore.py
    │   ├── funnel_analysis_explore.py
    │   ├── glean_ping_explore.py
    │   ├── growth_accounting_explore.py
    │   ├── metric_definitions_explore.py
    │   ├── operational_monitoring_explore.py
    │   ├── ping_explore.py
    │   └── table_explore.py
    ├── lkml_update.py
    ├── lookml.py
    ├── metrics_utils.py
    ├── namespaces.py
    ├── operational_monitoring_utils.py
    ├── spoke.py
    ├── utils.py
    └── views
    │   ├── __init__.py
    │   ├── client_counts_view.py
    │   ├── datagroups.py
    │   ├── events_view.py
    │   ├── funnel_analysis_view.py
    │   ├── glean_ping_view.py
    │   ├── growth_accounting_view.py
    │   ├── lookml_utils.py
    │   ├── metric_definitions_view.py
    │   ├── operational_monitoring_alerting_view.py
    │   ├── operational_monitoring_view.py
    │   ├── ping_view.py
    │   ├── table_view.py
    │   └── view.py
├── namespaces-disallowlist.yaml
├── pytest.ini
├── requirements.in
├── requirements.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── data
        └── metric-hub
        │   └── definitions
        │       └── fenix.toml
    ├── test_datagroups.py
    ├── test_events.py
    ├── test_funnel_analysis.py
    ├── test_glean_ping_view.py
    ├── test_integration.py
    ├── test_lookml.py
    ├── test_lookml_utils.py
    ├── test_namespaces.py
    ├── test_operational_monitoring.py
    ├── test_spoke.py
    └── utils.py


/.circleci/post-diff.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | // A script for posting the generated LookML diff to Github from CircleCI.
  3 | // This requires GH_AUTH_TOKEN to be set up, along-side CircleCI specific
  4 | // variables. See the source at [1] for more details.
  5 | // https://github.com/themadcreator/circle-github-bot/blob/master/src/index.ts
  6 | 
  7 | const fs = require("fs");
  8 | const bot = require("circle-github-bot").create();
  9 | const { graphql } = require("@octokit/graphql");
 10 | const path = require("path");
 11 | 
 12 | const diff_file = "lookml.diff";
 13 | const graphql_authorized = graphql.defaults({
 14 |     headers: {
 15 |         authorization: `token ${process.env.GH_AUTH_TOKEN}`,
 16 |     },
 17 | });
 18 | // Github comments can have a maximum length of 65536 characters
 19 | const max_content_length = 65000;
 20 | 
 21 | async function minimize_pr_diff_comments() {
 22 |     if (!process.env.CIRCLE_PULL_REQUEST) {
 23 |         return;
 24 |     }
 25 |     const { viewer } = await graphql_authorized(
 26 |         `query {
 27 |             viewer {
 28 |                 login
 29 |             }
 30 |         }`
 31 |     );
 32 |     const { repository } = await graphql_authorized(
 33 |         `query($repo_owner:String!, $repo_name:String!, $pr_number:Int!) {
 34 |             repository(owner: $repo_owner, name: $repo_name) {
 35 |                 pullRequest(number: $pr_number) {
 36 |                     comments(last: 100) {
 37 |                         nodes {
 38 |                             id
 39 |                             author {
 40 |                                 login
 41 |                             }
 42 |                             bodyText
 43 |                             isMinimized
 44 |                         }
 45 |                     }
 46 |                 }
 47 |             }
 48 |         }`,
 49 |         {
 50 |             repo_owner: process.env.CIRCLE_PROJECT_USERNAME,
 51 |             repo_name: process.env.CIRCLE_PROJECT_REPONAME,
 52 |             pr_number: parseInt(path.basename(process.env.CIRCLE_PULL_REQUEST)),
 53 |         }
 54 |     );
 55 |     for (const comment of repository.pullRequest.comments.nodes) {
 56 |         if (
 57 |             comment.author.login === viewer.login
 58 |             && comment.bodyText.includes(diff_file)
 59 |             && !comment.isMinimized
 60 |         ) {
 61 |             console.log(`Minimizing comment ${comment.id}.`);
 62 |             await graphql_authorized(
 63 |                 `mutation($comment_id:ID!) {
 64 |                     minimizeComment(input: {subjectId: $comment_id, classifier: OUTDATED}) {
 65 |                         clientMutationId
 66 |                     }
 67 |                 }`,
 68 |                 {
 69 |                     comment_id: comment.id,
 70 |                 }
 71 |             );
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | function diff() {
 77 |     let root = "/tmp/workspace/";
 78 |     let diff_content = fs.readFileSync(root + "/" + diff_file, "utf8");
 79 | 
 80 |     var body = "No content detected.";
 81 |     var warnings = "";
 82 | 
 83 |     if (diff_content) {
 84 |         if (diff_content.length > max_content_length) {
 85 |             diff_content = diff_content.substring(0, max_content_length);
 86 |             warnings = "⚠️ Only part of the diff is displayed."
 87 |         }
 88 |         body = `<details>
 89 | <summary>Click to expand!</summary>
 90 | 
 91 | \`\`\`diff
 92 | ${diff_content}
 93 | \`\`\`
 94 | 
 95 | </details>
 96 | 
 97 | ${warnings}
 98 | 
 99 | [Link to full diff](https://output.circle-artifacts.com/output/job/${process.env.CIRCLE_WORKFLOW_JOB_ID}/artifacts/${process.env.CIRCLE_NODE_INDEX}/${diff_file})
100 | `
101 |     }
102 |     var content = `#### \`${diff_file}\`
103 | ${body}
104 | `;
105 |     return content;
106 | }
107 | 
108 | function post_diff() {
109 |     bot.comment(
110 |         process.env.GH_AUTH_TOKEN,
111 |         `### Integration report for "${bot.env.commitMessage}"
112 | ${diff()}
113 | `
114 |     );
115 | }
116 | 
117 | minimize_pr_diff_comments().then(post_diff);
118 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | ### A list of files and file patterns to be excluded from the Dockerfile
 2 | 
 3 | # 3rd-party configuration
 4 | .circleci
 5 | .github
 6 | .vscode
 7 | .gitignore
 8 | 
 9 | # Config files
10 | .pre-commit-config.yaml
11 | 
12 | # Docker artifacts
13 | Dockerfile
14 | docker-compose.yml
15 | 
16 | # Python artifacts
17 | requirements.in
18 | *.pyc
19 | 
20 | # Other
21 | Makefile
22 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | ignore = E203, W503
4 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 2
3 | updates:
4 |   - package-ecosystem: pip
5 |     directory: /
6 |     schedule:
7 |       interval: daily
8 | 
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.swp
 3 | *.swo
 4 | *.egg-info/
 5 | .DS_Store
 6 | .vscode
 7 | .mypy_cache/
 8 | .probe_cache
 9 | venv/
10 | namespaces.yaml
11 | looker-hub/
12 | .env
13 | .vscode
14 | .python-version
15 | 
16 | build/*
17 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile = black
3 | skip = venv
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.2.0
 4 |     hooks:
 5 |       - id: trailing-whitespace
 6 |       - id: no-commit-to-branch
 7 |         args: [--branch, main]
 8 |   - repo: https://github.com/psf/black
 9 |     rev: 24.3.0
10 |     hooks:
11 |       - id: black
12 |   - repo: https://github.com/PyCQA/flake8
13 |     rev: 7.1.1
14 |     hooks:
15 |       - id: flake8
16 |   - repo: https://github.com/PyCQA/isort
17 |     rev: 5.12.0
18 |     hooks:
19 |       - id: isort
20 |   - repo: https://github.com/PyCQA/pydocstyle
21 |     rev: 6.3.0
22 |     hooks:
23 |       - id: pydocstyle
24 |         exclude: (.*/)?test_.*\.py
25 |   - repo: https://github.com/pre-commit/mirrors-mypy
26 |     rev: v1.5.1
27 |     hooks:
28 |       - id: mypy
29 |         additional_dependencies:
30 |           - types-PyYAML
31 |   - repo: https://github.com/adrienverge/yamllint
32 |     rev: v1.37.0
33 |     hooks:
34 |       - id: yamllint
35 |         args: [-c, .yamllint.yaml, .]
36 | 


--------------------------------------------------------------------------------
/.yamllint.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | rules:
 3 |   line-length:
 4 |     allow-non-breakable-words: true
 5 |     allow-non-breakable-inline-mappings: true
 6 |   indentation:
 7 |     spaces: consistent
 8 |     indent-sequences: true
 9 |     check-multi-line-strings: false
10 | 
11 | ignore: |
12 |   .git/
13 |   venv/
14 |   .circleci/
15 |   namespaces.yaml
16 |   looker-hub/namespaces.yaml
17 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Dependency updates (via dependabot)
2 | requirements.in @mozilla/data-looker
3 | requirements.txt @mozilla/data-looker
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | 
 3 | LABEL maintainer="kignasiak@mozilla.com"
 4 | 
 5 | ENV USER_ID="10001"
 6 | ENV GROUP_ID="app"
 7 | ENV HOME="/app"
 8 | 
 9 | RUN groupadd --gid ${USER_ID} ${GROUP_ID} && \
10 |     useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /app ${GROUP_ID}
11 | 
12 | # For grpc https://github.com/grpc/grpc/issues/24556#issuecomment-751797589
13 | RUN apt-get update -qqy && \
14 |     apt-get install -qqy build-essential git curl software-properties-common
15 | RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
16 |     && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
17 |     && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null
18 | RUN apt update
19 | RUN apt install -y gh
20 | 
21 | COPY --from=google/cloud-sdk:339.0.0-alpine /google-cloud-sdk /google-cloud-sdk
22 | ENV PATH /google-cloud-sdk/bin:$PATH
23 | 
24 | WORKDIR ${HOME}
25 | 
26 | COPY requirements.txt .
27 | 
28 | RUN pip install --upgrade pip \
29 |     && pip install --no-deps --no-cache-dir -r requirements.txt \
30 |     && rm requirements.txt
31 | 
32 | COPY . ./lookml-generator
33 | RUN pip install --no-dependencies --no-cache-dir -e ./lookml-generator
34 | ENV PATH $PATH:${HOME}/lookml-generator/bin
35 | 
36 | RUN chown -R ${USER_ID}:${GROUP_ID} ${HOME}
37 | USER ${USER_ID}
38 | 
39 | ENTRYPOINT ["generate"]
40 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help build run shell
 2 | 
 3 | help:
 4 | 	@echo " build Builds the docker images for the docker-compose setup."
 5 | 	@echo " run   Runs a command."
 6 | 	@echo " shell Opens a bash shell
 7 | 
 8 | build:
 9 | 	docker-compose build
10 | 
11 | run:
12 | 	docker-compose run app $(COMMAND)
13 | 
14 | shell:
15 | 	docker-compose run --entrypoint /bin/bash app
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # lookml-generator
  2 | [![mozilla](https://circleci.com/gh/mozilla/lookml-generator.svg?style=svg)](https://circleci.com/gh/mozilla/lookml-generator/?branch=main)
  3 | 
  4 | LookML Generator for Glean and Mozilla Data.
  5 | 
  6 | The lookml-generator has two important roles:
  7 | 1. Generate a listing of all Glean/Mozilla namespaces and their associated BigQuery tables
  8 | 2. From that listing, generate LookML for views, explores, and dashboards and push those to the [Look Hub project](https://github.com/mozilla/looker-hub)
  9 | 
 10 | ## Generating Namespace Listings
 11 | 
 12 | At Mozilla, a namespace is a single functional area that is represented in Looker with (usually) one model*.
 13 | Each Glean application is self-contained within a single namespace, containing the data from [across that application's channels](https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings).
 14 | We also support custom namespaces, which can use wildcards to denote their BigQuery datasets and tables. These are described in `custom-namespaces.yaml`.
 15 | 
 16 | ![alt text](https://github.com/mozilla/lookml-generator/blob/main/architecture/namespaces.jpg?raw=true)
 17 | 
 18 | > \*  Though namespaces are not limited to a single model, we advise it for clarity's sake.
 19 | 
 20 | ## Adding Custom Namespaces
 21 | Custom namespaces need to be defined explicitly in `custom-namespaces.yaml`. For each namespace views and explores to be generated need to be specified.
 22 | 
 23 | Make sure the custom namespaces is _not_ listed in `namespaces-disallowlist.yaml`.
 24 | 
 25 | Once changes have been approved and merged, the [lookml-generator changes can get deployed](#deploying-new-lookml-generator-changes).
 26 | 
 27 | ## Generating LookML
 28 | Once we know which tables are associated with which namespaces, we can generate LookML files and update our Looker instance.
 29 | 
 30 | Lookml-generator generates LookML based on both the BigQuery schema and manual changes. For example, we would want to add `city` drill-downs for all `country` fields.
 31 | ![alt text](https://github.com/mozilla/lookml-generator/blob/main/architecture/lookml.jpg?raw=true)
 32 | 
 33 | 
 34 | ### Pushing Changes to Dev Branches
 35 | In addition to pushing new lookml to the [main branch](https://github.com/mozilla/looker-hub), we reset the dev branches to also
 36 | point to the commit at `main`. This only happens during production deployment runs.
 37 | 
 38 | To automate this process for your dev branch, add it to [this file](https://github.com/mozilla/lookml-generator/tree/main/bin/dev_branches).
 39 | You can edit that file in your browser. Open a PR and tag [data-looker](https://github.com/orgs/mozilla/teams/data-looker) for review.
 40 | You can find your dev branch by going to [Looker](https://mozilla.cloud.looker.com), entering development mode, opening the [`looker-hub`](https://mozilla.cloud.looker.com/projects/looker-hub)
 41 | project, clicking the "Git Actions" icon, and finding your personal branch in the "Current Branch" dropdown.
 42 | 
 43 | ## Setup
 44 | 
 45 | Ensure Python 3.10+ is available on your machine (see [this guide](https://docs.python-guide.org/starting/install3/osx/) for instructions if you're on a mac and haven't installed anything other than the default system Python.)
 46 | 
 47 | You will also need the Google Cloud SDK with valid credentials.
 48 | After setting up the Google Cloud SDK, run:
 49 | 
 50 | ```bash
 51 | gcloud config set project moz-fx-data-shared-prod
 52 | gcloud auth login --update-adc
 53 | ```
 54 | 
 55 | Install requirements in a Python venv
 56 | ```bash
 57 | python3.10 -m venv venv/
 58 | venv/bin/pip install --no-deps -r requirements.txt
 59 | ```
 60 | 
 61 | Update requirements when they change with `pip-sync`
 62 | ```bash
 63 | venv/bin/pip-sync
 64 | ```
 65 | 
 66 | Setup pre-commit hooks
 67 | ```bash
 68 | venv/bin/pre-commit install
 69 | ```
 70 | 
 71 | Run unit tests and linters
 72 | ```bash
 73 | venv/bin/pytest
 74 | ```
 75 | 
 76 | Run integration tests
 77 | ```bash
 78 | venv/bin/pytest -m integration
 79 | ```
 80 | 
 81 | Note that the integration tests require a valid login to BigQuery to succeed.
 82 | 
 83 | ## Testing generation locally
 84 | 
 85 | You can test namespace generation by running:
 86 | 
 87 | ```bash
 88 | ./bin/generator namespaces
 89 | ```
 90 | 
 91 | To generate the actual lookml (in `looker-hub`), run:
 92 | 
 93 | ```bash
 94 | ./bin/generator lookml
 95 | ```
 96 | 
 97 | ## Container Development
 98 | 
 99 | Most code changes will not require changes to the generation script or container.
100 | However, you can test it locally. The following script will test generation, pushing
101 | a new branch to the `looker-hub` repository:
102 | 
103 | ```
104 | export HUB_BRANCH_PUBLISH="yourname-generation-test-1"
105 | export GIT_SSH_KEY_BASE64=$(cat ~/.ssh/id_rsa | base64)
106 | make build && make run
107 | ```
108 | 
109 | ## Deploying new `lookml-generator` changes
110 | 
111 | `lookml-generator` runs daily to update the `looker-hub` and `looker-spoke-default` code. Changes
112 | to the underlying tables should automatically propogate to their respective views and explores.
113 | 
114 | Airflow updates the two repositories [each morning](https://github.com/mozilla/telemetry-airflow/blob/main/dags/probe_scraper.py#L320).
115 | If you need your changes deployed quickly, wait for the container to build after you merge to
116 | `main`, and re-run the task in Airflow (`lookml_generator`, in the `probe_scraper` DAG).
117 | 
118 | ## `generate` Command Explained - High Level Explanation
119 | 
120 | When `make run` is executed a Docker container is spun up using the latest `lookml-generator` Docker image on your machine and runs the [`generate` script](bin/generate) using configuration defined at the top of the script unless [overridden using environment variables](./docker-compose.yml#L13-L25) (see the [Container Development](#container-development) section above).
121 | 
122 | Next, the process authenticates with GitHub, clones the [`looker-hub` repository](https://github.com/mozilla/looker-hub), and creates the branch defined in the `HUB_BRANCH_PUBLISH` config variable both locally and in the remote. Then it proceeds to checkout into the looker-hub `base` branch and pulls it from the remote.
123 | 
124 | Once the setup is done, the process generates `namespaces.yaml` and uses it to generate LookML code. A git diff is executed to ensure that the files that already exist in the `base` branch are not being modified. If changes are detected then the process exists with an error code. Otherwise, it proceeds to create a commit and push it to the remote dev branch created earlier.
125 | 
126 | When following the `Container Development` steps, the entire process results in a dev branch in `looker-hub` with brand new generated LookML code which can be tested by going to Looker, switching to the "development mode" and selecting the dev branch just created/updated by this command. This will result in Looker using the brand new LookML code just generated. Otherwise, changes merged into `main` in this repo will become available on looker-hub `main` when the scheduled Airflow job runs.
127 | 


--------------------------------------------------------------------------------
/architecture/lookml.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/lookml-generator/684c39ac39a605cca4c76738d79795df89bd237a/architecture/lookml.jpg


--------------------------------------------------------------------------------
/architecture/namespaces.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/lookml-generator/684c39ac39a605cca4c76738d79795df89bd237a/architecture/namespaces.jpg


--------------------------------------------------------------------------------
/architecture/namespaces_yaml.md:
--------------------------------------------------------------------------------
 1 | # `namespaces.yaml`
 2 | 
 3 | We use [`namespaces.yaml`](https://github.com/mozilla/looker-hub/blob/main/namespaces.yaml) as the declarative listing of the Looker namespaces generated by this repository.
 4 | 
 5 | Each entry in `namespaces.yaml` represents a namespace, and has the following properties:
 6 | 
 7 | - `owners` (string): The owners are the people who will have control over the associated Namespace folder in Looker. It is up to them to decide which dashboards to "promote" to their shared folder.
 8 | - `pretty_name` (string): The pretty name is used in most places where the namespace's name is seen, e.g. in the explore drop-down and folder name.
 9 | - `glean_app` (bool): Whether or not this namespace represents a Glean Application.
10 | - `connection` (optional string): The database connection to use, as named in Looker. Defaults to `telemetry`.
11 | - `views` (object): The LookML View files that will be generated. More detailed info below.
12 | - `explores` (object): The LookML Explore files that will be generated. More detailed info below.
13 | 
14 | ## `views`
15 | 
16 | Each View entry is actually a LookML view file that will be generated.
17 | Each LookML View file can contain multiple Looker Views; the idea here is that these views are related and used together. By convention, the first view in the file is the base view (i.e. associated join views folllow after the explore containing the base dimension and metrics).
18 | 
19 | - `type`: The type of the view, e.g. `glean_ping_view`.
20 | - `tables`: This field is used in a few ways, depending on the associated View type.
21 | 
22 | For `GleanPingView` and `PingView`, `tables` represents all of the associated channels for that view. Each table will have a `channel` and `table` entry. Only a single view will be created in the LookML File.
23 | 
24 | ```yaml
25 | tables:
26 |   - channel: release
27 |     table: mozdata.org_mozilla_firefox.metrics
28 |   - channel: nightly
29 |     table: mozdata.org_mozilla_fenix.metrics
30 | ```
31 | 
32 | For `ClientCountView` and `GrowthAccountingView`, `tables` will have a single entry, with the name of the table the Looker View is based off of. Only a single Looker View will be created.
33 | 
34 | ```yaml
35 | tables:
36 |   - table: mozdata.org_mozilla_firefox.baseline_clients_last_seen
37 | ```
38 | 
39 | For `FunnelAnalysisView`, only the first list entry is used; inside that entry, each value represents a Looker View that is created. The key is the name of the view, the value is the Looker View or BQ View it is derived from.
40 | In the following example, 4 views will be created in the view file: `funnel_analysis`, `event_types`, `event_type_1` and `event_type_2`.
41 | 
42 | ```yaml
43 | tables:
44 |   - funnel_analysis: events_daily_table
45 |     event_types: `mozdata.glean_app.event_types`
46 |     event_type_1: event_types
47 |     event_type_2: event_types
48 | ```
49 | 
50 | ## `explores`
51 | 
52 | Each Explore entry is a single file, sometimes containing multiple explores within it (mainly for things like changing suggestions).
53 | 
54 | - `type` - The type of the explore, e.g. `growth_accounting_explore`.
55 | - `views` - The views that this is based on. Generally, the allowed keys here are:
56 |   - `base_view`: The base view is the one we are basing this Explore on, using [`view_name`](https://docs.looker.com/reference/explore-params/view_name).
57 |   - `extended_view*`: Any views we include in the `base_view` are added as these. It could be one (`extended_view`) or multiple (`extended_view_1`).
58 |   - `joined_view*`: Any other view we are going to join to this one. _This is only required if the joined view is not defined in the same view file as `base_view`._
59 | 
60 | It may not necessarily be desirable to list all of the views and explores in the `namespace.yaml` (e.g. suggest explores specific to a view). In these cases, it is useful to adopt the convention where the first view is the primary view for the explore.
61 | 


--------------------------------------------------------------------------------
/bin/dev_branches:
--------------------------------------------------------------------------------
1 | dev-frank-bertsch-t9kx
2 | 


--------------------------------------------------------------------------------
/bin/generate:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # A script for generating `namespaces.yaml` and the associated LookML.
  4 | # This repository builds namespaces.yaml from Glean applications and
  5 | # `custom-namespaces.yaml`, and then generates files and LookML that
  6 | # match the specification in `namespaces.yaml` and table definitions
  7 | # in BigQuery. The resulting LookML is pushed to our `looker-hub`
  8 | # repository.
  9 | #
 10 | # Environment variables:
 11 | #   GIT_SSH_KEY_BASE64:       A base64-encoded ssh secret key with permissions to push
 12 | #                             to looker-hub and looker-spoke-default.
 13 | #   HUB_REPO_URL:             The URL to the looker-hub repository.
 14 | #                             Requires the SSH format, e.g. git@github.com:user/repo.git.
 15 | #   HUB_BRANCH_SOURCE:        The source branch for generating LookML.
 16 | #                             Defaults to 'base'. Files present the source
 17 | #                             branch will remain unchanged by generation.
 18 | #   HUB_BRANCH_PUBLISH:       The destination branch for publishing LookML.
 19 | #                             Defaults to 'test-lookml-generation'. If the
 20 | #                             branch doesn't exist, creates it from source.
 21 | #   SPOKE_REPO_URL:           The URL to the looker-spoke-default repository.
 22 | #                             Requires the SSH format, e.g. git@github.com:user/repo.git.
 23 | #   PRIVATE_SPOKE_REPO_URL:   The URL to the looker-spoke-private repository.
 24 | #                             Requires the SSH format, e.g. git@github.com:user/repo.git.
 25 | #   SPOKE_BRANCH_WORKING:     The working branch for the spoke project.
 26 | #                             This is the branch that changes will be
 27 | #                             published to, and a PR opened to merge
 28 | #                             them in to SPOKE_BRANCH_PUBLISH.
 29 | #                             Defaults to SPOKE_BRANCH_PUBLISH-working.
 30 | #   SPOKE_BRANCH_PUBLISH:     The publish branch for the spoke project.
 31 | #                             A PR to merge the changes from SPOKE_BRANCH_WORKING
 32 | #                             to here will be opened. This branch also serves as
 33 | #                             the source branch.
 34 | #   LOOKER_INSTANCE_URI:      The URI of our looker instance. Defaults to dev.
 35 | #   LOOKER_API_CLIENT_ID:     Client ID for Looker access. If unset, does
 36 | #                             not run `generator content`.
 37 | #   LOOKER_API_CLIENT_SECRET: Client Secret for Looker access. If unset, does
 38 | #                             not run `generator content`.
 39 | #   GITHUB_ACCESS_TOKEN:      Access token for Github. Needs read and write
 40 | #                             access to repos. Not required in dev.
 41 | #   UPDATE_DEV_BRANCHES:      Whether or not the dev branches should be updated.
 42 | #                             This should only happen in production.
 43 | #   UPDATE_SPOKE_BRANCHES:    Whether or not pull-requests should be opened against
 44 | #                             branches in spoke projects.
 45 | #                             This should only happen in production.
 46 | #
 47 | # Example usage:
 48 | #   export GIT_SSH_KEY_BASE64=$(cat ~/.ssh/id_rsa | base64)
 49 | #   make build && make run
 50 | 
 51 | HUB_REPO_URL=${HUB_REPO_URL:-"git@github.com:mozilla/looker-hub.git"}
 52 | HUB_BRANCH_SOURCE=${HUB_BRANCH_SOURCE:-"base"}
 53 | HUB_BRANCH_PUBLISH=${HUB_BRANCH_PUBLISH:-"main-nonprod"}
 54 | 
 55 | SPOKE_REPO_URL=${SPOKE_REPO_URL:-"git@github.com:mozilla/looker-spoke-default.git"}
 56 | PRIVATE_SPOKE_REPO_URL=${PRIVATE_SPOKE_REPO_URL:-"git@github.com:mozilla/looker-spoke-private.git"}
 57 | SPOKE_BRANCH_PUBLISH=${SPOKE_BRANCH_PUBLISH:-"main-nonprod"}
 58 | SPOKE_BRANCH_WORKING=${SPOKE_BRANCH_WORKING:-"${SPOKE_BRANCH_PUBLISH}-working"}
 59 | 
 60 | LOOKER_INSTANCE_URI=${LOOKER_INSTANCE_URI:-"https://mozilladev.cloud.looker.com"}
 61 | UPDATE_DEV_BRANCHES=${UPDATE_DEV_BRANCHES:-"false"}
 62 | UPDATE_SPOKE_BRANCHES=${UPDATE_SPOKE_BRANCHES:-"false"}
 63 | 
 64 | function setup_git_auth() {
 65 |     # Configure the container for pushing to github.
 66 | 
 67 |     if [[ -z "$GIT_SSH_KEY_BASE64" ]]; then
 68 |         echo "Missing secret key" 1>&2
 69 |         exit 1
 70 |     fi
 71 | 
 72 |     git config --global user.name "Generated LookML Creator"
 73 |     git config --global user.email "dataops+looker@mozilla.com"
 74 | 
 75 |     mkdir -p "$HOME/.ssh"
 76 | 
 77 |     echo "$GIT_SSH_KEY_BASE64" | base64 --decode > "$HOME"/.ssh/id_ed25519
 78 |     # Makes the future git-push non-interactive
 79 |     ssh-keyscan github.com > "$HOME"/.ssh/known_hosts
 80 | 
 81 |     chown -R "$(id -u):$(id -g)" "$HOME/.ssh"
 82 |     chmod 700 "$HOME/.ssh"
 83 |     chmod 700 "$HOME/.ssh/id_ed25519"
 84 | 
 85 |     # add private key to the ssh agent to prompt for password once
 86 |     eval "$(ssh-agent)"
 87 |     ssh-add
 88 | }
 89 | 
 90 | function setup_github_auth() {
 91 |   # Configure Github CLI auth.
 92 | 
 93 |   if [[ -z "$GITHUB_ACCESS_TOKEN" ]]; then
 94 |     gh auth login
 95 |   else
 96 |     gh auth login --with-token <<< "$GITHUB_ACCESS_TOKEN"
 97 |   fi
 98 | }
 99 | 
100 | function setup_hub() {
101 |     # Checkout looker-hub and changes directory to prepare for
102 |     # LookML generation.
103 |     pushd .
104 | 
105 |     cd /app
106 |     [[ -d looker-hub ]] && rm -rf looker-hub
107 |     git clone "$HUB_REPO_URL"
108 |     cd looker-hub
109 |     git fetch --all
110 |     # If publish branch doesn't exist, create it from main
111 |     git checkout "$HUB_BRANCH_PUBLISH" || (git checkout main && git checkout -b "$HUB_BRANCH_PUBLISH")
112 |     git checkout "$HUB_BRANCH_SOURCE"
113 | 
114 |     popd
115 | }
116 | 
117 | function setup_spoke() {
118 |     # Checkout looker-spoke-default and changes directory to prepare for
119 |     # LookML generation. Create publish branch if non-existent.
120 |     pushd .
121 |     spoke="$1"
122 |     spoke_url="$2"
123 | 
124 |     cd /app
125 |     [[ -d $spoke ]] && rm -rf $spoke
126 |     git clone "$spoke_url"
127 |     cd $spoke
128 |     git fetch --all
129 |     git checkout $SPOKE_BRANCH_PUBLISH || (git checkout main && git checkout -b $SPOKE_BRANCH_PUBLISH)
130 |     git branch -D $SPOKE_BRANCH_WORKING || true # delete working branch if it exists
131 |     git checkout -b $SPOKE_BRANCH_WORKING
132 | 
133 |     popd
134 | }
135 | 
136 | function setup_spokes() {
137 |   setup_spoke "looker-spoke-default" $SPOKE_REPO_URL
138 |   setup_spoke "looker-spoke-private" $PRIVATE_SPOKE_REPO_URL
139 | }
140 | 
141 | function check_files_and_commit() {
142 |   # Add the new files and commit.
143 |   # Use interactive mode to add untracked files
144 |   # This also works when it's untracked directories
145 |   echo -e "a\n*\nu\n*\nq\n"|git add -i
146 |   git commit -m "Auto-push from LookML generation" \
147 |     || echo "Nothing to commit"
148 | }
149 | 
150 | function generate_hub_commit() {
151 |   # Generate commit on publish branch with
152 |   # generated LookML.
153 | 
154 |   pushd .
155 |   cd /app
156 | 
157 |   HUB_DIR="looker-hub"
158 |   NAMESPACE_DISALLOWLIST="/app/lookml-generator/namespaces-disallowlist.yaml"
159 |   CUSTOM_NAMESPACES_FILENAME="/app/lookml-generator/custom-namespaces.yaml"
160 |   GENERATED_SQL_URI="https://github.com/mozilla/bigquery-etl/archive/generated-sql.tar.gz"
161 |   APP_LISTINGS_URI="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings"
162 | 
163 |   # Generate namespaces.yaml and LookML
164 |   lookml-generator namespaces \
165 |     --custom-namespaces $CUSTOM_NAMESPACES_FILENAME \
166 |     --generated-sql-uri $GENERATED_SQL_URI \
167 |     --app-listings-uri $APP_LISTINGS_URI \
168 |     --disallowlist $NAMESPACE_DISALLOWLIST
169 |   lookml-generator lookml \
170 |     --namespaces "namespaces.yaml" \
171 |     --target-dir $HUB_DIR
172 | 
173 |   cd $HUB_DIR
174 | 
175 |   check_files_and_commit
176 | 
177 |   # Checkout main. Match it with source branch.
178 |   git checkout "$HUB_BRANCH_PUBLISH"
179 |   find . -mindepth 1 -maxdepth 1 -not -name .git -exec rm -rf {} +
180 |   git checkout "$HUB_BRANCH_SOURCE" -- *
181 |   git commit --all \
182 |     --message "Auto-push from LookML generation" \
183 |     || echo "Nothing to commit"
184 | 
185 |   popd
186 | }
187 | 
188 | function update_dev_branches() {
189 |   # Reset all dev branches to main
190 | 
191 |   pushd .
192 |   cd /app/looker-hub
193 | 
194 |   dev_branches_file="/app/lookml-generator/bin/dev_branches"
195 |   while read branch; do
196 |     git checkout $branch
197 |     git reset --hard main
198 |     git push -f origin $branch
199 |   done < $dev_branches_file
200 | 
201 |   popd
202 | }
203 | 
204 | function generate_spoke_commits() {
205 |   # Generate commit on spoke publish branch
206 |   # with generated LookML.
207 | 
208 |   pushd .
209 |   cd /app
210 | 
211 |   lookml-generator update-spoke \
212 |     --namespaces "namespaces.yaml" \
213 |     --spoke-dir "/app"
214 | 
215 |   cd "/app/looker-spoke-default"
216 |   check_files_and_commit
217 | 
218 |   cd "/app/looker-spoke-private"
219 |   check_files_and_commit
220 | 
221 |   popd
222 | }
223 | 
224 | function hit_looker_webhooks() {
225 |   # These webhooks ensure production is up-to-date.
226 |   # See https://help.looker.com/hc/en-us/articles/360001288268-Deploy-Webhook-Pulling-From-Remote-Git-Repository
227 |   curl "$LOOKER_INSTANCE_URI/webhooks/projects/looker-hub/deploy"
228 |   curl "$LOOKER_INSTANCE_URI/webhooks/projects/spoke-default/deploy"
229 |   curl "$LOOKER_INSTANCE_URI/webhooks/projects/spoke-private/deploy"
230 | }
231 | 
232 | function push_and_open_spoke_pull_request() {
233 |   # Open a PR to merge the changes from working branch
234 |   # in to the publish branch.
235 | 
236 |   git push -f || git push -f --set-upstream origin "$SPOKE_BRANCH_WORKING"
237 | 
238 |   # https://bugzilla.mozilla.org/show_bug.cgi?id=1774030
239 |   # could not request reviewer: 'mozilla/data-looker' not found
240 |   # git diff --quiet $SPOKE_BRANCH_PUBLISH...$SPOKE_BRANCH_WORKING ||
241 |   #   gh pr create \
242 |   #   --title "Auto-push from LookML Generator" \
243 |   #   --reviewer mozilla/data-looker \
244 |   #   --base $SPOKE_BRANCH_PUBLISH \
245 |   #   --head $SPOKE_BRANCH_WORKING \
246 |   #   --body ""
247 | 
248 |   git diff --quiet $SPOKE_BRANCH_PUBLISH...$SPOKE_BRANCH_WORKING ||
249 |     gh pr create \
250 |     --title "Auto-push from LookML Generator" \
251 |     --base $SPOKE_BRANCH_PUBLISH \
252 |     --head $SPOKE_BRANCH_WORKING \
253 |     --body ""
254 | }
255 | 
256 | function main() {
257 |   set -e  # stop if any statement returns a non-zero exit code
258 |   pushd .
259 |   cd /app
260 | 
261 |   set +x  # don't print these commands
262 |   setup_git_auth
263 |   setup_github_auth
264 | 
265 |   # Set up hub and commit
266 |   set -x  # print these commands
267 |   setup_hub
268 |   generate_hub_commit
269 | 
270 |   # Publish hub
271 |   cd /app/looker-hub
272 |   git push || git push --set-upstream origin "$HUB_BRANCH_PUBLISH"
273 | 
274 |   # Update dev branches
275 |   if [ "$UPDATE_DEV_BRANCHES" = "true" ] ; then
276 |     update_dev_branches
277 |   fi
278 | 
279 |   # Update branches on spoke-default and spoke-private
280 |   if [ "$UPDATE_SPOKE_BRANCHES" = "true" ] ; then
281 |     # Set up spokes and commit
282 |     setup_spokes
283 |     generate_spoke_commits
284 | 
285 |     # Publish spoke - force push to working branch
286 |     cd /app/looker-spoke-default
287 |     push_and_open_spoke_pull_request
288 | 
289 |     cd /app/looker-spoke-private
290 |     push_and_open_spoke_pull_request
291 | 
292 |     # Update Looker content
293 |     hit_looker_webhooks
294 |   fi
295 | 
296 |   popd
297 | }
298 | 
299 | main "$@"
300 | 


--------------------------------------------------------------------------------
/bin/generator:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd "$(dirname "$0")/.."
 4 | 
 5 | if [ -e venv ]; then
 6 |   . venv/bin/activate
 7 | fi
 8 | 
 9 | exec python3 -c "from generator import cli; cli(prog_name='$0')" "$@"
10 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: '3'
 3 | 
 4 | services:
 5 |   app:
 6 |     build:
 7 |       context: .
 8 |       dockerfile: Dockerfile
 9 |     restart: "no"
10 |     command: "true"
11 |     volumes:
12 |       - ~/.config:/app/.config
13 |     environment:
14 |       - GIT_SSH_KEY_BASE64
15 |       - HUB_REPO_URL
16 |       - HUB_BRANCH_SOURCE
17 |       - HUB_BRANCH_PUBLISH
18 |       - SPOKE_REPO_URL
19 |       - SPOKE_BRANCH_PUBLISH
20 |       - LOOKER_INSTANCE_URI
21 |       - LOOKER_API_CLIENT_ID
22 |       - LOOKER_API_CLIENT_SECRET
23 |       - GITHUB_ACCESS_TOKEN
24 |       - UPDATE_DEV_BRANCHES
25 |       - UPDATE_SPOKE_BRANCHES
26 | 


--------------------------------------------------------------------------------
/generator/__init__.py:
--------------------------------------------------------------------------------
 1 | """Generate LookML.
 2 | 
 3 | .. include:: ../README.md
 4 | .. include:: ../architecture/namespaces_yaml.md
 5 | """
 6 | 
 7 | __docformat__ = "restructuredtext"
 8 | 
 9 | import sys
10 | import warnings
11 | 
12 | import click
13 | from google.auth.exceptions import DefaultCredentialsError
14 | from google.cloud import bigquery
15 | 
16 | from .lookml import lookml
17 | from .namespaces import namespaces
18 | from .spoke import update_spoke
19 | 
20 | 
21 | def is_authenticated():
22 |     """Check if the user is authenticated to GCP."""
23 |     try:
24 |         bigquery.Client()
25 |     except DefaultCredentialsError:
26 |         return False
27 |     return True
28 | 
29 | 
30 | def cli(prog_name=None):
31 |     """Generate and run CLI."""
32 |     if not is_authenticated():
33 |         print(
34 |             "Authentication to GCP required. Run `gcloud auth login --update-adc` "
35 |             "and check that the project is set correctly."
36 |         )
37 |         sys.exit(1)
38 | 
39 |     commands = {
40 |         "namespaces": namespaces,
41 |         "lookml": lookml,
42 |         "update-spoke": update_spoke,
43 |     }
44 | 
45 |     @click.group(commands=commands)
46 |     def group():
47 |         """CLI interface for lookml automation."""
48 | 
49 |     warnings.filterwarnings(
50 |         "ignore",
51 |         "Your application has authenticated using end user credentials",
52 |         module="google.auth._default",
53 |     )
54 | 
55 |     group(prog_name=prog_name)
56 | 


--------------------------------------------------------------------------------
/generator/__main__.py:
--------------------------------------------------------------------------------
 1 | """Run lookml_generator cli."""
 2 | 
 3 | from . import cli
 4 | 
 5 | 
 6 | def main():
 7 |     """Run the CLI."""
 8 |     cli("generator")
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     main()
13 | 


--------------------------------------------------------------------------------
/generator/dashboards/__init__.py:
--------------------------------------------------------------------------------
1 | """All possible dashboard types."""
2 | 
3 | from .dashboard import Dashboard  # noqa: F401
4 | from .operational_monitoring_dashboard import OperationalMonitoringDashboard
5 | 
6 | DASHBOARD_TYPES = {
7 |     OperationalMonitoringDashboard.type: OperationalMonitoringDashboard,
8 | }
9 | 


--------------------------------------------------------------------------------
/generator/dashboards/dashboard.py:
--------------------------------------------------------------------------------
 1 | """Generic dashboard type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from dataclasses import dataclass, field
 6 | from typing import Dict, List
 7 | 
 8 | 
 9 | @dataclass
10 | class Dashboard(object):
11 |     """A generic Looker Dashboard."""
12 | 
13 |     title: str
14 |     name: str
15 |     layout: str
16 |     namespace: str
17 |     tables: List[Dict[str, str]]
18 |     type: str = field(init=False)
19 | 
20 |     def to_dict(self) -> dict:
21 |         """Dashboard instance represented as a dict."""
22 |         return {
23 |             self.name: {
24 |                 "title": self.title,
25 |                 "type": self.type,
26 |                 "layout": self.layout,
27 |                 "namespace": self.namespace,
28 |                 "tables": self.tables,
29 |             }
30 |         }
31 | 
32 |     def to_lookml(self):
33 |         """Generate Lookml for this dashboard."""
34 |         raise NotImplementedError("Only implemented in subclass.")
35 | 


--------------------------------------------------------------------------------
/generator/dashboards/operational_monitoring_dashboard.py:
--------------------------------------------------------------------------------
  1 | """Class to describe Operational Monitoring Dashboard."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import Any, Dict, List
  6 | 
  7 | from ..views import lookml_utils
  8 | from .dashboard import Dashboard
  9 | 
 10 | 
 11 | class OperationalMonitoringDashboard(Dashboard):
 12 |     """An Operational Monitoring dashboard."""
 13 | 
 14 |     type: str = "operational_monitoring_dashboard"
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         title: str,
 19 |         name: str,
 20 |         layout: str,
 21 |         namespace: str,
 22 |         defn: List[Dict[str, Any]],
 23 |     ):
 24 |         """Get an instance of a Operational Monitoring Dashboard."""
 25 |         self.dimensions = defn[0].get("dimensions", {})
 26 |         self.xaxis = defn[0]["xaxis"]
 27 |         self.compact_visualization = defn[0].get("compact_visualization", False)
 28 |         self.group_by_dimension = defn[0].get("group_by_dimension", None)
 29 | 
 30 |         super().__init__(title, name, layout, namespace, defn)
 31 | 
 32 |     @classmethod
 33 |     def from_dict(
 34 |         klass, namespace: str, name: str, defn: dict
 35 |     ) -> OperationalMonitoringDashboard:
 36 |         """Get a OperationalMonitoringDashboard from a dict representation."""
 37 |         title = defn["title"]
 38 |         return klass(title, name, "newspaper", namespace, defn["tables"])
 39 | 
 40 |     def _map_series_to_colours(self, branches, explore):
 41 |         colours = [
 42 |             "#3FE1B0",
 43 |             "#0060E0",
 44 |             "#9059FF",
 45 |             "#B933E1",
 46 |             "#FF2A8A",
 47 |             "#FF505F",
 48 |             "#FF7139",
 49 |             "#FFA537",
 50 |             "#005E5D",
 51 |             "#073072",
 52 |             "#7F165B",
 53 |             "#A7341F",
 54 |         ]
 55 |         return {branch: color for branch, color in zip(branches, colours)}
 56 | 
 57 |     def to_lookml(self):
 58 |         """Get this dashboard as LookML."""
 59 |         kwargs = {
 60 |             "name": self.name,
 61 |             "title": self.title,
 62 |             "layout": self.layout,
 63 |             "elements": [],
 64 |             "dimensions": [],
 65 |             "group_by_dimension": self.group_by_dimension,
 66 |             "alerts": None,
 67 |             "compact_visualization": self.compact_visualization,
 68 |         }
 69 | 
 70 |         includes = []
 71 |         graph_index = 0
 72 |         for table_defn in self.tables:
 73 |             explore = table_defn["explore"]
 74 |             includes.append(
 75 |                 f"/looker-hub/{self.namespace}/explores/{explore}.explore.lkml"
 76 |             )
 77 | 
 78 |             if table_defn["table"].endswith("alerts"):
 79 |                 kwargs["alerts"] = {
 80 |                     "explore": explore,
 81 |                     "col": 0,
 82 |                     "date": (
 83 |                         f"{self.xaxis}_date" if self.xaxis == "build_id" else self.xaxis
 84 |                     ),
 85 |                 }
 86 |             else:
 87 |                 if len(kwargs["dimensions"]) == 0:
 88 |                     kwargs["dimensions"] = [
 89 |                         {
 90 |                             "name": name,
 91 |                             "title": lookml_utils.slug_to_title(name),
 92 |                             "default": info["default"],
 93 |                             "options": info["options"],
 94 |                         }
 95 |                         for name, info in self.dimensions.items()
 96 |                     ]
 97 | 
 98 |                 series_colors = self._map_series_to_colours(
 99 |                     table_defn["branches"], explore
100 |                 )
101 |                 # determine metric groups
102 |                 metric_groups = {}
103 |                 for summary in table_defn.get("summaries", []):
104 |                     for metric_group in summary.get("metric_groups", []):
105 |                         if metric_group not in metric_groups:
106 |                             metric_groups[metric_group] = [summary["metric"]]
107 |                         elif summary["metric"] not in metric_groups[metric_group]:
108 |                             metric_groups[metric_group].append(summary["metric"])
109 | 
110 |                 seen_metric_groups = []
111 |                 for summary in table_defn.get("summaries", []):
112 |                     summary_metric_groups = summary.get("metric_groups", [])
113 |                     if len(summary_metric_groups) == 0:
114 |                         # append a dummy entry if no metric group defined
115 |                         summary_metric_groups.append(None)
116 | 
117 |                     for metric_group in summary_metric_groups:
118 |                         if (metric_group, summary["statistic"]) in seen_metric_groups:
119 |                             continue
120 | 
121 |                         if self.compact_visualization:
122 |                             title = "Metric"
123 |                         else:
124 |                             if metric_group is None:
125 |                                 title = lookml_utils.slug_to_title(summary["metric"])
126 |                             else:
127 |                                 title = lookml_utils.slug_to_title(metric_group)
128 | 
129 |                         if not self.group_by_dimension:
130 |                             kwargs["elements"].append(
131 |                                 {
132 |                                     "title": title,
133 |                                     "metric": (
134 |                                         summary["metric"]
135 |                                         if metric_group is None
136 |                                         else ", ".join(
137 |                                             f'"{m}"'
138 |                                             for m in metric_groups[metric_group]
139 |                                         )
140 |                                     ),
141 |                                     "statistic": summary["statistic"],
142 |                                     "explore": explore,
143 |                                     "series_colors": series_colors,
144 |                                     "xaxis": self.xaxis,
145 |                                     "row": int(graph_index / 2) * 10,
146 |                                     "col": 0 if graph_index % 2 == 0 else 12,
147 |                                     "is_metric_group": metric_group is not None,
148 |                                 }
149 |                             )
150 |                             if metric_group is not None:
151 |                                 seen_metric_groups.append(
152 |                                     (metric_group, summary["statistic"])
153 |                                 )
154 |                             graph_index += 1
155 | 
156 |                         if self.group_by_dimension:
157 |                             kwargs["elements"].append(
158 |                                 {
159 |                                     "title": f"{title} - By {self.group_by_dimension}",
160 |                                     "metric": (
161 |                                         summary["metric"]
162 |                                         if metric_group is None
163 |                                         else ", ".join(
164 |                                             f'"{m}"'
165 |                                             for m in metric_groups[metric_group]
166 |                                         )
167 |                                     ),
168 |                                     "statistic": summary["statistic"],
169 |                                     "explore": explore,
170 |                                     "series_colors": series_colors,
171 |                                     "xaxis": self.xaxis,
172 |                                     "row": int(graph_index / 2) * 10,
173 |                                     "col": 0 if graph_index % 2 == 0 else 12,
174 |                                     "is_metric_group": metric_group is not None,
175 |                                 }
176 |                             )
177 |                             graph_index += 1
178 | 
179 |                         if self.compact_visualization:
180 |                             # compact visualization only needs a single tile for all probes
181 |                             break
182 | 
183 |                     if self.compact_visualization:
184 |                         # compact visualization only needs a single tile for all probes
185 |                         break
186 | 
187 |         if "alerts" in kwargs and kwargs["alerts"] is not None:
188 |             kwargs["alerts"]["row"] = int(graph_index / 2) * 10
189 | 
190 |         dash_lookml = lookml_utils.render_template(
191 |             "dashboard.lkml", "dashboards", **kwargs
192 |         )
193 |         return dash_lookml
194 | 


--------------------------------------------------------------------------------
/generator/dashboards/templates/dashboard.lkml:
--------------------------------------------------------------------------------
  1 | - dashboard: {{name}}
  2 |   title: {{title}}
  3 |   layout: {{layout}}
  4 |   preferred_viewer: dashboards-next
  5 | 
  6 |   elements:
  7 |   {% for element in elements -%}
  8 |   - title: {{element.title}}
  9 |     name: {{element.title}}_{{element.statistic}}
 10 |     {% if not compact_visualization -%}
 11 |     note_state: expanded
 12 |     note_display: above
 13 |     note_text: {{element.statistic.title()}}
 14 |     {% endif -%}
 15 |     explore: {{element.explore}}
 16 |     {% if element.statistic == "percentile" -%}
 17 |     type: "ci-line-chart"
 18 |     {% else -%}
 19 |     type: looker_line
 20 |     {% endif -%}
 21 |     fields: [
 22 |       {{element.explore}}.{{element.xaxis}},
 23 |       {{element.explore}}.branch,
 24 |       {% if element.statistic == "percentile" -%}
 25 |       {{element.explore}}.upper,
 26 |       {{element.explore}}.lower,
 27 |       {% endif -%}
 28 |       {{element.explore}}.point
 29 |     ]
 30 |     pivots: [
 31 |       {{element.explore}}.branch
 32 |       {%- if group_by_dimension and element.title.endswith(group_by_dimension) %}, {{element.explore}}.{{group_by_dimension}}{% endif %}
 33 |       {%- if element.is_metric_group %}, {{element.explore}}.metric{% endif %}
 34 |     ]
 35 |     {% if not compact_visualization -%}
 36 |     filters:
 37 |       {{element.explore}}.metric: '{{element.metric}}'
 38 |       {{element.explore}}.statistic: {{element.statistic}}
 39 |     {% endif -%}
 40 |     row: {{element.row}}
 41 |     col: {{element.col}}
 42 |     width: 12
 43 |     height: 8
 44 |     field_x: {{element.explore}}.{{element.xaxis}}
 45 |     field_y: {{element.explore}}.point
 46 |     log_scale: false
 47 |     ci_lower: {{element.explore}}.lower
 48 |     ci_upper: {{element.explore}}.upper
 49 |     show_grid: true
 50 |     listen:
 51 |       Date: {{element.explore}}.{{element.xaxis}}
 52 |       {%- if element.statistic == "percentile" %}
 53 |       Percentile: {{element.explore}}.parameter
 54 |       {%- endif %}
 55 |       {%- for dimension in dimensions %}
 56 |       {{dimension.title}}: {{element.explore}}.{{dimension.name}}
 57 |       {%- endfor %}
 58 |       {% if compact_visualization -%}
 59 |       Metric: {{element.explore}}.metric
 60 |       Statistic: {{element.explore}}.statistic
 61 |       {% endif -%}
 62 |     {%- for branch, color in element.series_colors.items() %}
 63 |     {{ branch }}: "{{ color }}"
 64 |     {%- endfor %}
 65 |     defaults_version: 0
 66 |   {% endfor -%}
 67 |   {% if alerts is not none %}
 68 |   - title: Alerts
 69 |     name: Alerts
 70 |     model: operational_monitoring
 71 |     explore: {{alerts.explore}}
 72 |     type: looker_grid
 73 |     fields: [{{alerts.explore}}.{{elements[0].xaxis}},
 74 |       {%- for dimension in dimensions %}
 75 |       {{alerts.explore}}.{{dimension.name}},
 76 |       {%- endfor %}
 77 |       {{alerts.explore}}.metric, {{alerts.explore}}.statistic, {{alerts.explore}}.parameter,
 78 |       {{alerts.explore}}.message, {{alerts.explore}}.branch, {{alerts.explore}}.errors]
 79 |     sorts: [{{alerts.explore}}.submission_date
 80 |         desc]
 81 |     limit: 500
 82 |     show_view_names: false
 83 |     show_row_numbers: true
 84 |     transpose: false
 85 |     truncate_text: true
 86 |     hide_totals: false
 87 |     hide_row_totals: false
 88 |     size_to_fit: true
 89 |     table_theme: white
 90 |     limit_displayed_rows: false
 91 |     enable_conditional_formatting: false
 92 |     header_text_alignment: left
 93 |     header_font_size: 12
 94 |     rows_font_size: 12
 95 |     conditional_formatting_include_totals: false
 96 |     conditional_formatting_include_nulls: false
 97 |     x_axis_gridlines: false
 98 |     y_axis_gridlines: true
 99 |     show_y_axis_labels: true
100 |     show_y_axis_ticks: true
101 |     y_axis_tick_density: default
102 |     y_axis_tick_density_custom: 5
103 |     show_x_axis_label: true
104 |     show_x_axis_ticks: true
105 |     y_axis_scale_mode: linear
106 |     x_axis_reversed: false
107 |     y_axis_reversed: false
108 |     plot_size_by_field: false
109 |     trellis: ''
110 |     stacking: ''
111 |     legend_position: center
112 |     point_style: none
113 |     show_value_labels: false
114 |     label_density: 25
115 |     x_axis_scale: auto
116 |     y_axis_combined: true
117 |     show_null_points: true
118 |     interpolation: linear
119 |     defaults_version: 1
120 |     series_types: {}
121 |     listen:
122 |       Date: {{alerts.explore}}.{{alerts.date}}
123 |     row: {{ alerts.row }}
124 |     col: {{ alerts.col }}
125 |     width: 24
126 |     height: 6
127 |   {% endif %}
128 |   filters:
129 |   - name: Date
130 |     title: Date
131 |     type: field_filter
132 |     allow_multiple_values: true
133 |     required: false
134 |     ui_config:
135 |       type: advanced
136 |       display: popover
137 |     model: operational_monitoring
138 |     explore: {{elements[0].explore}}
139 |     listens_to_filters: []
140 |     field: {{elements[0].explore}}.{{elements[0].xaxis}}
141 | 
142 |   - name: Percentile
143 |     title: Percentile
144 |     type: field_filter
145 |     default_value: '50'
146 |     allow_multiple_values: false
147 |     required: true
148 |     ui_config:
149 |       type: advanced
150 |       display: popover
151 |     model: operational_monitoring
152 |     explore: {{ elements[0].explore }}
153 |     listens_to_filters: []
154 |     field: {{ elements[0].explore }}.parameter
155 |   {% if compact_visualization -%}
156 |   - name: Metric
157 |     title: Metric
158 |     type: field_filter
159 |     default_value: '{{ elements[0].metric }}'
160 |     allow_multiple_values: false
161 |     required: true
162 |     ui_config:
163 |       type: dropdown_menu
164 |       display: popover
165 |     model: operational_monitoring
166 |     explore: {{ elements[0].explore }}
167 |     listens_to_filters: []
168 |     field: {{ elements[0].explore }}.metric
169 |   - name: Statistic
170 |     title: Statistic
171 |     type: field_filter
172 |     default_value: '{{ elements[0].statistic }}'
173 |     allow_multiple_values: false
174 |     required: true
175 |     ui_config:
176 |       type: dropdown_menu
177 |       display: popover
178 |     model: operational_monitoring
179 |     explore: {{ elements[0].explore }}
180 |     listens_to_filters: [Metric]
181 |     field: {{ elements[0].explore }}.statistic
182 |   {% endif -%}
183 | 
184 |   {% for dimension in dimensions -%}
185 |   {% if dimension.name != group_by_dimension %}
186 |   - title: {{dimension.title}}
187 |     name: {{dimension.title}}
188 |     type: string_filter
189 |     default_value: '{{dimension.default}}'
190 |     allow_multiple_values: false
191 |     required: true
192 |     ui_config:
193 |       type: dropdown_menu
194 |       display: inline
195 |       options:
196 |       {% for option in dimension.options -%}
197 |       - '{{option}}'
198 |       {% endfor %}
199 |   {% else %}
200 |   - title: {{dimension.title}}
201 |     name: {{dimension.title}}
202 |     type: string_filter
203 |     default_value: '{% for option in dimension.options | sort -%}{{option}}{% if not loop.last %},{% endif %}{% endfor %}'
204 |     allow_multiple_values: true
205 |     required: true
206 |     ui_config:
207 |       type: advanced
208 |       display: inline
209 |       options:
210 |       {% for option in dimension.options | sort -%}
211 |       - '{{option}}'
212 |       {% endfor %}
213 |   {% endif %}
214 |   {% endfor -%}
215 | 


--------------------------------------------------------------------------------
/generator/dryrun.py:
--------------------------------------------------------------------------------
  1 | """Dry Run method to get BigQuery metadata."""
  2 | 
  3 | import json
  4 | from enum import Enum
  5 | from functools import cached_property
  6 | from typing import Optional
  7 | from urllib.request import Request, urlopen
  8 | 
  9 | import google.auth
 10 | from google.auth.transport.requests import Request as GoogleAuthRequest
 11 | from google.cloud import bigquery
 12 | from google.oauth2.id_token import fetch_id_token
 13 | 
 14 | DRY_RUN_URL = (
 15 |     "https://us-central1-moz-fx-data-shared-prod.cloudfunctions.net/bigquery-etl-dryrun"
 16 | )
 17 | 
 18 | 
 19 | def credentials(auth_req: Optional[GoogleAuthRequest] = None):
 20 |     """Get GCP credentials."""
 21 |     auth_req = auth_req or GoogleAuthRequest()
 22 |     creds, _ = google.auth.default(
 23 |         scopes=["https://www.googleapis.com/auth/cloud-platform"]
 24 |     )
 25 |     creds.refresh(auth_req)
 26 |     return creds
 27 | 
 28 | 
 29 | def id_token():
 30 |     """Get token to authenticate against Cloud Function."""
 31 |     auth_req = GoogleAuthRequest()
 32 |     creds = credentials(auth_req)
 33 | 
 34 |     if hasattr(creds, "id_token"):
 35 |         # Get token from default credentials for the current environment created via Cloud SDK run
 36 |         id_token = creds.id_token
 37 |     else:
 38 |         # If the environment variable GOOGLE_APPLICATION_CREDENTIALS is set to service account JSON file,
 39 |         # then ID token is acquired using this service account credentials.
 40 |         id_token = fetch_id_token(auth_req, DRY_RUN_URL)
 41 |     return id_token
 42 | 
 43 | 
 44 | class DryRunError(Exception):
 45 |     """Exception raised on dry run errors."""
 46 | 
 47 |     def __init__(self, message, error, use_cloud_function, table_id):
 48 |         """Initialize DryRunError."""
 49 |         super().__init__(message)
 50 |         self.error = error
 51 |         self.use_cloud_function = use_cloud_function
 52 |         self.table_id = table_id
 53 | 
 54 |     def __reduce__(self):
 55 |         """
 56 |         Override to ensure that all parameters are being passed when pickling.
 57 | 
 58 |         Pickling happens when passing exception between processes (e.g. via multiprocessing)
 59 |         """
 60 |         return (
 61 |             self.__class__,
 62 |             self.args + (self.error, self.use_cloud_function, self.table_id),
 63 |         )
 64 | 
 65 | 
 66 | class Errors(Enum):
 67 |     """DryRun errors that require special handling."""
 68 | 
 69 |     READ_ONLY = 1
 70 |     DATE_FILTER_NEEDED = 2
 71 |     DATE_FILTER_NEEDED_AND_SYNTAX = 3
 72 |     PERMISSION_DENIED = 4
 73 | 
 74 | 
 75 | class DryRunContext:
 76 |     """DryRun builder class."""
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         use_cloud_function=False,
 81 |         id_token=None,
 82 |         credentials=None,
 83 |         dry_run_url=DRY_RUN_URL,
 84 |     ):
 85 |         """Initialize dry run instance."""
 86 |         self.use_cloud_function = use_cloud_function
 87 |         self.dry_run_url = dry_run_url
 88 |         self.id_token = id_token
 89 |         self.credentials = credentials
 90 | 
 91 |     def create(
 92 |         self,
 93 |         sql=None,
 94 |         project="moz-fx-data-shared-prod",
 95 |         dataset=None,
 96 |         table=None,
 97 |     ):
 98 |         """Initialize a DryRun instance."""
 99 |         return DryRun(
100 |             use_cloud_function=self.use_cloud_function,
101 |             id_token=self.id_token,
102 |             credentials=self.credentials,
103 |             sql=sql,
104 |             project=project,
105 |             dataset=dataset,
106 |             table=table,
107 |             dry_run_url=self.dry_run_url,
108 |         )
109 | 
110 | 
111 | class DryRun:
112 |     """Dry run SQL."""
113 | 
114 |     def __init__(
115 |         self,
116 |         use_cloud_function=False,
117 |         id_token=None,
118 |         credentials=None,
119 |         sql=None,
120 |         project="moz-fx-data-shared-prod",
121 |         dataset=None,
122 |         table=None,
123 |         dry_run_url=DRY_RUN_URL,
124 |     ):
125 |         """Initialize dry run instance."""
126 |         self.sql = sql
127 |         self.use_cloud_function = use_cloud_function
128 |         self.project = project
129 |         self.dataset = dataset
130 |         self.table = table
131 |         self.dry_run_url = dry_run_url
132 |         self.id_token = id_token
133 |         self.credentials = credentials
134 | 
135 |     @cached_property
136 |     def client(self):
137 |         """Get BigQuery client instance."""
138 |         return bigquery.Client(credentials=self.credentials)
139 | 
140 |     @cached_property
141 |     def dry_run_result(self):
142 |         """Return the dry run result."""
143 |         try:
144 |             if self.use_cloud_function:
145 |                 json_data = {
146 |                     "query": self.sql or "SELECT 1",
147 |                     "project": self.project,
148 |                     "dataset": self.dataset or "telemetry",
149 |                 }
150 | 
151 |                 if self.table:
152 |                     json_data["table"] = self.table
153 | 
154 |                 r = urlopen(
155 |                     Request(
156 |                         self.dry_run_url,
157 |                         headers={
158 |                             "Content-Type": "application/json",
159 |                             "Authorization": f"Bearer {self.id_token}",
160 |                         },
161 |                         data=json.dumps(json_data).encode("utf8"),
162 |                         method="POST",
163 |                     )
164 |                 )
165 |                 return json.load(r)
166 |             else:
167 |                 query_schema = None
168 |                 referenced_tables = []
169 |                 table_metadata = None
170 | 
171 |                 if self.sql:
172 |                     job_config = bigquery.QueryJobConfig(
173 |                         dry_run=True,
174 |                         use_query_cache=False,
175 |                         query_parameters=[
176 |                             bigquery.ScalarQueryParameter(
177 |                                 "submission_date", "DATE", "2019-01-01"
178 |                             )
179 |                         ],
180 |                     )
181 | 
182 |                     if self.project:
183 |                         job_config.connection_properties = [
184 |                             bigquery.ConnectionProperty(
185 |                                 "dataset_project_id", self.project
186 |                             )
187 |                         ]
188 | 
189 |                     job = self.client.query(self.sql, job_config=job_config)
190 |                     query_schema = (
191 |                         job._properties.get("statistics", {})
192 |                         .get("query", {})
193 |                         .get("schema", {})
194 |                     )
195 |                     referenced_tables = [
196 |                         ref.to_api_repr() for ref in job.referenced_tables
197 |                     ]
198 | 
199 |                 if (
200 |                     self.project is not None
201 |                     and self.table is not None
202 |                     and self.dataset is not None
203 |                 ):
204 |                     table = self.client.get_table(
205 |                         f"{self.project}.{self.dataset}.{self.table}"
206 |                     )
207 |                     table_metadata = {
208 |                         "tableType": table.table_type,
209 |                         "friendlyName": table.friendly_name,
210 |                         "schema": {
211 |                             "fields": [field.to_api_repr() for field in table.schema]
212 |                         },
213 |                     }
214 | 
215 |                 return {
216 |                     "valid": True,
217 |                     "referencedTables": referenced_tables,
218 |                     "schema": query_schema,
219 |                     "tableMetadata": table_metadata,
220 |                 }
221 |         except Exception as e:
222 |             print(f"ERROR {e}")
223 |             return None
224 | 
225 |     def get_schema(self):
226 |         """Return the query schema by dry running the SQL file."""
227 |         self.validate()
228 | 
229 |         if (
230 |             self.dry_run_result
231 |             and self.dry_run_result["valid"]
232 |             and "schema" in self.dry_run_result
233 |         ):
234 |             return self.dry_run_result["schema"]["fields"]
235 | 
236 |         return []
237 | 
238 |     def get_table_schema(self):
239 |         """Return the schema of the provided table."""
240 |         self.validate()
241 | 
242 |         if (
243 |             self.dry_run_result
244 |             and self.dry_run_result["valid"]
245 |             and "tableMetadata" in self.dry_run_result
246 |         ):
247 |             return self.dry_run_result["tableMetadata"]["schema"]["fields"]
248 | 
249 |         return []
250 | 
251 |     def get_table_metadata(self):
252 |         """Return table metadata."""
253 |         self.validate()
254 | 
255 |         if (
256 |             self.dry_run_result
257 |             and self.dry_run_result["valid"]
258 |             and "tableMetadata" in self.dry_run_result
259 |         ):
260 |             return self.dry_run_result["tableMetadata"]
261 | 
262 |         return {}
263 | 
264 |     def validate(self):
265 |         """Dry run the provided SQL file and check if valid."""
266 |         dry_run_error = DryRunError(
267 |             "Error when dry running SQL",
268 |             self.get_error(),
269 |             self.use_cloud_function,
270 |             self.table,
271 |         )
272 | 
273 |         if self.dry_run_result is None:
274 |             raise dry_run_error
275 | 
276 |         if self.dry_run_result["valid"]:
277 |             return True
278 |         elif self.get_error() == Errors.READ_ONLY:
279 |             # We want the dryrun service to only have read permissions, so
280 |             # we expect CREATE VIEW and CREATE TABLE to throw specific
281 |             # exceptions.
282 |             return True
283 |         elif self.get_error() == Errors.DATE_FILTER_NEEDED:
284 |             # With strip_dml flag, some queries require a partition filter
285 |             # (submission_date, submission_timestamp, etc.) to run
286 |             return True
287 |         else:
288 |             print("ERROR\n", self.dry_run_result["errors"])
289 |             raise dry_run_error
290 | 
291 |     def errors(self):
292 |         """Dry run the provided SQL file and return errors."""
293 |         if self.dry_run_result is None:
294 |             return []
295 |         return self.dry_run_result.get("errors", [])
296 | 
297 |     def get_error(self) -> Optional[Errors]:
298 |         """Get specific errors for edge case handling."""
299 |         errors = self.errors()
300 |         if len(errors) != 1:
301 |             return None
302 | 
303 |         error = errors[0]
304 |         if error and error.get("code") in [400, 403]:
305 |             error_message = error.get("message", "")
306 |             if (
307 |                 "does not have bigquery.tables.create permission for dataset"
308 |                 in error_message
309 |                 or "Permission bigquery.tables.create denied" in error_message
310 |                 or "Permission bigquery.datasets.update denied" in error_message
311 |             ):
312 |                 return Errors.READ_ONLY
313 |             if "without a filter over column(s)" in error_message:
314 |                 return Errors.DATE_FILTER_NEEDED
315 |             if (
316 |                 "Syntax error: Expected end of input but got keyword WHERE"
317 |                 in error_message
318 |             ):
319 |                 return Errors.DATE_FILTER_NEEDED_AND_SYNTAX
320 |             if (
321 |                 "Permission bigquery.tables.get denied on table" in error_message
322 |                 or "User does not have permission to query table" in error_message
323 |             ):
324 |                 return Errors.PERMISSION_DENIED
325 |         return None
326 | 


--------------------------------------------------------------------------------
/generator/explores/__init__.py:
--------------------------------------------------------------------------------
 1 | """All possible explore types."""
 2 | 
 3 | from .explore import Explore  # noqa: F401 isort:skip
 4 | from .client_counts_explore import ClientCountsExplore
 5 | from .events_explore import EventsExplore
 6 | from .funnel_analysis_explore import FunnelAnalysisExplore
 7 | from .glean_ping_explore import GleanPingExplore
 8 | from .growth_accounting_explore import GrowthAccountingExplore
 9 | from .metric_definitions_explore import MetricDefinitionsExplore
10 | from .operational_monitoring_explore import (
11 |     OperationalMonitoringAlertingExplore,
12 |     OperationalMonitoringExplore,
13 | )
14 | from .ping_explore import PingExplore
15 | from .table_explore import TableExplore
16 | 
17 | EXPLORE_TYPES = {
18 |     ClientCountsExplore.type: ClientCountsExplore,
19 |     EventsExplore.type: EventsExplore,
20 |     FunnelAnalysisExplore.type: FunnelAnalysisExplore,
21 |     GleanPingExplore.type: GleanPingExplore,
22 |     PingExplore.type: PingExplore,
23 |     GrowthAccountingExplore.type: GrowthAccountingExplore,
24 |     MetricDefinitionsExplore.type: MetricDefinitionsExplore,
25 |     OperationalMonitoringExplore.type: OperationalMonitoringExplore,
26 |     OperationalMonitoringAlertingExplore.type: OperationalMonitoringAlertingExplore,
27 |     TableExplore.type: TableExplore,
28 | }
29 | 


--------------------------------------------------------------------------------
/generator/explores/client_counts_explore.py:
--------------------------------------------------------------------------------
 1 | """Client Counts explore type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import View
 9 | from . import Explore
10 | 
11 | 
12 | class ClientCountsExplore(Explore):
13 |     """A Client Counts Explore, from Baseline Clients Last Seen."""
14 | 
15 |     type: str = "client_counts_explore"
16 | 
17 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
18 |         """Generate LookML to represent this explore."""
19 |         queries = []
20 |         if time_partitioning_group := self.get_view_time_partitioning_group(
21 |             self.views["extended_view"]
22 |         ):
23 |             date_dimension = f"{time_partitioning_group}_date"
24 |             queries.append(
25 |                 {
26 |                     "description": "Client Counts of weekly cohorts over the past N days.",
27 |                     "dimensions": ["days_since_first_seen", "first_seen_week"],
28 |                     "measures": ["client_count"],
29 |                     "pivots": ["first_seen_week"],
30 |                     "filters": [
31 |                         {date_dimension: "8 weeks"},
32 |                         {"first_seen_date": "8 weeks"},
33 |                         {"have_completed_period": "yes"},
34 |                     ],
35 |                     "sorts": [{"days_since_first_seen": "asc"}],
36 |                     "name": "cohort_analysis",
37 |                 }
38 |             )
39 |             if self.has_view_dimension(self.views["extended_view"], "app_build"):
40 |                 queries.append(
41 |                     {
42 |                         "description": "Number of clients per build.",
43 |                         "dimensions": [date_dimension, "app_build"],
44 |                         "measures": ["client_count"],
45 |                         "pivots": ["app_build"],
46 |                         "sorts": [{date_dimension: "asc"}],
47 |                         "name": "build_breakdown",
48 |                     }
49 |                 )
50 | 
51 |         explore_lookml = {
52 |             "name": self.name,
53 |             "view_name": self.views["base_view"],
54 |             "description": "Client counts across dimensions and cohorts.",
55 |             "always_filter": {
56 |                 "filters": self.get_required_filters("extended_view"),
57 |             },
58 |             "queries": queries,
59 |             "joins": self.get_unnested_fields_joins_lookml(),
60 |         }
61 | 
62 |         if datagroup := self.get_datagroup():
63 |             explore_lookml["persist_with"] = datagroup
64 | 
65 |         return [explore_lookml]
66 | 
67 |     @staticmethod
68 |     def from_views(views: List[View]) -> Iterator[ClientCountsExplore]:
69 |         """
70 |         If possible, generate a Client Counts explore for this namespace.
71 | 
72 |         Client counts explores are only created for client_counts views.
73 |         """
74 |         for view in views:
75 |             if view.name == "client_counts":
76 |                 yield ClientCountsExplore(
77 |                     view.name,
78 |                     {
79 |                         "base_view": "client_counts",
80 |                         "extended_view": "baseline_clients_daily_table",
81 |                     },
82 |                 )
83 | 
84 |     @staticmethod
85 |     def from_dict(name: str, defn: dict, views_path: Path) -> ClientCountsExplore:
86 |         """Get an instance of this explore from a dictionary definition."""
87 |         return ClientCountsExplore(name, defn["views"], views_path)
88 | 


--------------------------------------------------------------------------------
/generator/explores/events_explore.py:
--------------------------------------------------------------------------------
 1 | """An explore for Events Views."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import EventsView, View
 9 | from .explore import Explore
10 | 
11 | 
12 | class EventsExplore(Explore):
13 |     """An Events Explore, from any unnested events table."""
14 | 
15 |     type: str = "events_explore"
16 | 
17 |     @staticmethod
18 |     def from_views(views: List[View]) -> Iterator[EventsExplore]:
19 |         """Where possible, generate EventsExplores for Views."""
20 |         for view in views:
21 |             if isinstance(view, EventsView):
22 |                 yield EventsExplore(
23 |                     view.name,
24 |                     {
25 |                         "base_view": "events",
26 |                         "extended_view": view.tables[0]["events_table_view"],
27 |                     },
28 |                 )
29 | 
30 |     @staticmethod
31 |     def from_dict(name: str, defn: dict, views_path: Path) -> EventsExplore:
32 |         """Get an instance of this explore from a dictionary definition."""
33 |         return EventsExplore(name, defn["views"], views_path)
34 | 
35 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
36 |         name = self.name
37 |         if not name.endswith("_counts"):
38 |             name = "event_counts"
39 | 
40 |         lookml: Dict[str, Any] = {
41 |             "name": name,
42 |             "view_name": self.views["base_view"],
43 |             "description": "Event counts over time.",
44 |             "joins": self.get_unnested_fields_joins_lookml(),
45 |         }
46 |         if required_filters := self.get_required_filters("extended_view"):
47 |             lookml["always_filter"] = {"filters": required_filters}
48 |         if time_partitioning_group := self.get_view_time_partitioning_group(
49 |             self.views["extended_view"]
50 |         ):
51 |             date_dimension = f"{time_partitioning_group}_date"
52 |             lookml["queries"] = [
53 |                 {
54 |                     "description": "Event counts from all events over the past two weeks.",
55 |                     "dimensions": [date_dimension],
56 |                     "measures": ["event_count"],
57 |                     "filters": [
58 |                         {date_dimension: "14 days"},
59 |                     ],
60 |                     "name": "all_event_counts",
61 |                 },
62 |             ]
63 | 
64 |         if datagroup := self.get_datagroup():
65 |             lookml["persist_with"] = datagroup
66 | 
67 |         return [lookml]
68 | 


--------------------------------------------------------------------------------
/generator/explores/explore.py:
--------------------------------------------------------------------------------
  1 | """Generic explore type."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from dataclasses import dataclass, field
  6 | from pathlib import Path
  7 | from typing import Any, Dict, List, Optional, Tuple
  8 | 
  9 | import lkml
 10 | 
 11 | from ..views.lookml_utils import escape_filter_expr, slug_to_title
 12 | 
 13 | 
 14 | @dataclass
 15 | class Explore:
 16 |     """A generic explore."""
 17 | 
 18 |     name: str
 19 |     views: Dict[str, str]
 20 |     views_path: Optional[Path] = None
 21 |     defn: Optional[Dict[str, str]] = None
 22 |     type: str = field(init=False)
 23 | 
 24 |     def to_dict(self) -> dict:
 25 |         """Explore instance represented as a dict."""
 26 |         return {self.name: {"type": self.type, "views": self.views}}
 27 | 
 28 |     def to_lookml(
 29 |         self, v1_name: Optional[str], hidden: Optional[bool]
 30 |     ) -> List[Dict[str, Any]]:
 31 |         """
 32 |         Generate LookML for this explore.
 33 | 
 34 |         Any generation done in dependent explore's
 35 |         `_to_lookml` takes precedence over these fields.
 36 |         """
 37 |         base_lookml = {}
 38 |         if hidden:
 39 |             base_lookml["hidden"] = "yes"
 40 |         base_view_name = next(
 41 |             (
 42 |                 view_name
 43 |                 for view_type, view_name in self.views.items()
 44 |                 if view_type == "base_view"
 45 |             )
 46 |         )
 47 |         for view_type, view in self.views.items():
 48 |             # We look at our dependent views to see if they have a
 49 |             # "submission" field. Dependent views are any that are:
 50 |             # - base_view
 51 |             # - extended_view*
 52 |             #
 53 |             # We do not want to look at joined views. Those should be
 54 |             # labeled as:
 55 |             # - join*
 56 |             #
 57 |             # If they have a submission field, we filter on the date.
 58 |             # This allows for filter queries to succeed.
 59 |             if "join" in view_type:
 60 |                 continue
 61 |             if time_partitioning_group := self.get_view_time_partitioning_group(view):
 62 |                 base_lookml["sql_always_where"] = (
 63 |                     f"${{{base_view_name}.{time_partitioning_group}_date}} >= '2010-01-01'"
 64 |                 )
 65 | 
 66 |         # We only update the first returned explore
 67 |         new_lookml = self._to_lookml(v1_name)
 68 |         base_lookml.update(new_lookml[0])
 69 |         new_lookml[0] = base_lookml
 70 | 
 71 |         return new_lookml
 72 | 
 73 |     def _to_lookml(
 74 |         self,
 75 |         v1_name: Optional[str],
 76 |     ) -> List[Dict[str, Any]]:
 77 |         raise NotImplementedError("Only implemented in subclasses")
 78 | 
 79 |     def get_dependent_views(self) -> List[str]:
 80 |         """Get views this explore is dependent on."""
 81 |         dependent_views = []
 82 |         for _type, views in self.views.items():
 83 |             if _type.startswith("extended"):
 84 |                 continue
 85 |             elif _type.startswith("joined"):
 86 |                 dependent_views += [view for view in views]
 87 |             else:
 88 |                 dependent_views.append(views)
 89 |         return dependent_views
 90 | 
 91 |     @staticmethod
 92 |     def from_dict(name: str, defn: dict, views_path: Path) -> Explore:
 93 |         """Get an instance of an explore from a namespace definition."""
 94 |         raise NotImplementedError("Only implemented in subclasses")
 95 | 
 96 |     def get_view_lookml(self, view: str) -> dict:
 97 |         """Get the LookML for a view."""
 98 |         if self.views_path is not None:
 99 |             return lkml.load((self.views_path / f"{view}.view.lkml").read_text())
100 | 
101 |         raise Exception("Missing view path for get_view_lookml")
102 | 
103 |     def get_datagroup(self) -> Optional[str]:
104 |         """
105 |         Return the name of the associated datagroup.
106 | 
107 |         Return `None` if there is no datagroup for this explore.
108 |         """
109 |         if self.views_path and (self.views_path.parent / "datagroups").exists():
110 |             datagroups_path = self.views_path.parent / "datagroups"
111 |             datagroup_file = (
112 |                 datagroups_path
113 |                 / f'{self.views["base_view"]}_last_updated.datagroup.lkml'
114 |             )
115 |             if datagroup_file.exists():
116 |                 return f'{self.views["base_view"]}_last_updated'
117 |         return None
118 | 
119 |     def get_unnested_fields_joins_lookml(
120 |         self,
121 |     ) -> list:
122 |         """Get the LookML for joining unnested fields."""
123 |         views_lookml = self.get_view_lookml(self.views["base_view"])
124 |         views: List[str] = [view["name"] for view in views_lookml["views"]]
125 |         parent_base_name = views_lookml["views"][0]["name"]
126 | 
127 |         extended_views: List[str] = []
128 |         if "extended_view" in self.views:
129 |             # check for extended views
130 |             extended_views_lookml = self.get_view_lookml(self.views["extended_view"])
131 |             extended_views = [view["name"] for view in extended_views_lookml["views"]]
132 | 
133 |             views_lookml.update(extended_views_lookml)
134 |             views += extended_views
135 | 
136 |         joins = []
137 |         for view in views_lookml["views"][1:]:
138 |             view_name = view["name"]
139 |             # get repeated, nested fields that exist as separate views in lookml
140 |             base_name, metric = self._get_base_name_and_metric(
141 |                 view_name=view_name, views=views
142 |             )
143 |             metric_name = view_name
144 |             metric_label = slug_to_title(metric_name)
145 | 
146 |             if view_name in extended_views:
147 |                 # names of extended views are overriden by the name of the view that is extending them
148 |                 metric_label = slug_to_title(
149 |                     metric_name.replace(base_name, parent_base_name)
150 |                 )
151 |                 base_name = parent_base_name
152 | 
153 |             joins.append(
154 |                 {
155 |                     "name": view_name,
156 |                     "view_label": metric_label,
157 |                     "relationship": "one_to_many",
158 |                     "sql": (
159 |                         f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {metric_name} "
160 |                     ),
161 |                 }
162 |             )
163 | 
164 |         return joins
165 | 
166 |     def _get_default_channel(self, view: str) -> Optional[str]:
167 |         channel_params = [
168 |             param
169 |             for _view_defn in self.get_view_lookml(view)["views"]
170 |             for param in _view_defn.get("filters", [])
171 |             if _view_defn["name"] == view and param["name"] == "channel"
172 |         ]
173 | 
174 |         if channel_params:
175 |             allowed_values = channel_params[0]["suggestions"]
176 |             default_value = allowed_values[0]
177 |             return escape_filter_expr(default_value)
178 |         return None
179 | 
180 |     def _get_base_name_and_metric(
181 |         self, view_name: str, views: List[str]
182 |     ) -> Tuple[str, str]:
183 |         """
184 |         Get base view and metric names.
185 | 
186 |         Returns the the name of the base view and the metric based on the
187 |         passed `view_name` and existing views.
188 | 
189 |         The names are resolved in a backwards fashion to account for
190 |         repeated nested fields that might contain other nested fields.
191 |         For example:
192 | 
193 |         view: sync {
194 |             [...]
195 |             dimension: payload__events {
196 |                 sql: ${TABLE}.payload.events ;;
197 |             }
198 |         }
199 | 
200 |         view: sync__payload__events {
201 |             [...]
202 |             dimension: f5_ {
203 |                 sql: ${TABLE}.f5_ ;;
204 |             }
205 |         }
206 | 
207 |         view: sync__payload__events__f5_ {
208 |             [...]
209 |         }
210 | 
211 |         For these nested views to get translated to the following joins, the names
212 |         need to be resolved backwards:
213 | 
214 |         join: sync__payload__events {
215 |             relationship: one_to_many
216 |             sql: LEFT JOIN UNNEST(${sync.payload__events}) AS sync__payload__events ;;
217 |         }
218 | 
219 |         join: sync__payload__events__f5_ {
220 |             relationship: one_to_many
221 |             sql: LEFT JOIN UNNEST(${sync__payload__events.f5_}) AS sync__payload__events__f5_ ;;
222 |         }
223 |         """
224 |         split = view_name.split("__")
225 |         for index in range(len(split) - 1, 0, -1):
226 |             base_view = "__".join(split[:index])
227 |             metric = "__".join(split[index:])
228 |             if base_view in views:
229 |                 return (base_view, metric)
230 |         raise Exception(f"Cannot get base name and metric from view {view_name}")
231 | 
232 |     def has_view_dimension(self, view: str, dimension_name: str) -> bool:
233 |         """Determine whether a this view has this dimension."""
234 |         for _view_defn in self.get_view_lookml(view)["views"]:
235 |             if _view_defn["name"] != view:
236 |                 continue
237 |             for dim in _view_defn.get("dimensions", []):
238 |                 if dim["name"] == dimension_name:
239 |                     return True
240 |         return False
241 | 
242 |     def get_view_time_partitioning_group(self, view: str) -> Optional[str]:
243 |         """Get time partitiong dimension group for this view.
244 | 
245 |         Return the name of the first dimension group tagged "time_partitioning_field",
246 |         and fall back to "submission" if available.
247 |         """
248 |         has_submission = False
249 |         for _view_defn in self.get_view_lookml(view)["views"]:
250 |             if not _view_defn["name"] == view:
251 |                 continue
252 |             for dim in _view_defn.get("dimension_groups", []):
253 |                 if "time_partitioning_field" in dim.get("tags", []):
254 |                     return dim["name"]
255 |                 elif dim["name"] == "submission":
256 |                     has_submission = True
257 |         if has_submission:
258 |             return "submission"
259 |         return None
260 | 
261 |     def get_required_filters(self, view_name: str) -> List[Dict[str, str]]:
262 |         """Get required filters for this view."""
263 |         filters = []
264 |         view = self.views[view_name]
265 | 
266 |         # Add a default filter on channel, if it's present in the view
267 |         default_channel = self._get_default_channel(view)
268 |         if default_channel is not None:
269 |             filters.append({"channel": default_channel})
270 | 
271 |         # Add submission filter, if present in the view
272 |         if time_partitioning_group := self.get_view_time_partitioning_group(view):
273 |             filters.append({f"{time_partitioning_group}_date": "28 days"})
274 | 
275 |         return filters
276 | 
277 |     def __eq__(self, other) -> bool:
278 |         """Check for equality with other View."""
279 | 
280 |         def comparable_dict(d):
281 |             return tuple(sorted(d.items()))
282 | 
283 |         if isinstance(other, Explore):
284 |             return (
285 |                 self.name == other.name
286 |                 and comparable_dict(self.views) == comparable_dict(other.views)
287 |                 and self.type == other.type
288 |             )
289 |         return False
290 | 


--------------------------------------------------------------------------------
/generator/explores/funnel_analysis_explore.py:
--------------------------------------------------------------------------------
 1 | """Funnel Analysis explore type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import View
 9 | from . import Explore
10 | 
11 | 
12 | class FunnelAnalysisExplore(Explore):
13 |     """A Funnel Analysis Explore, from Baseline Clients Last Seen."""
14 | 
15 |     type: str = "funnel_analysis_explore"
16 |     n_funnel_steps: int = 4
17 | 
18 |     @staticmethod
19 |     def from_views(views: List[View]) -> Iterator[FunnelAnalysisExplore]:
20 |         """
21 |         If possible, generate a Funnel Analysis explore for this namespace.
22 | 
23 |         Funnel analysis explores are only created for funnel_analysis views.
24 |         """
25 |         for view in views:
26 |             if view.name == "funnel_analysis":
27 |                 yield FunnelAnalysisExplore(
28 |                     "funnel_analysis",
29 |                     {"base_view": view.name},
30 |                 )
31 | 
32 |     @staticmethod
33 |     def from_dict(name: str, defn: dict, views_path: Path) -> FunnelAnalysisExplore:
34 |         """Get an instance of this explore from a dictionary definition."""
35 |         return FunnelAnalysisExplore(name, defn["views"], views_path)
36 | 
37 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
38 |         view_lookml = self.get_view_lookml("funnel_analysis")
39 |         views = view_lookml["views"]
40 |         n_events = len([d for d in views if d["name"].startswith("step_")])
41 | 
42 |         explore_lookml = {
43 |             "name": "funnel_analysis",
44 |             "description": "Count funnel completion over time. Funnels are limited to a single day.",
45 |             "view_label": " User-Day Funnels",
46 |             "always_filter": {
47 |                 "filters": [
48 |                     {"submission_date": "14 days"},
49 |                 ]
50 |             },
51 |             "joins": [
52 |                 {
53 |                     "name": f"step_{n}",
54 |                     "relationship": "many_to_one",
55 |                     "type": "cross",
56 |                 }
57 |                 for n in range(1, n_events + 1)
58 |             ],
59 |             "sql_always_where": "${funnel_analysis.submission_date} >= '2010-01-01'",
60 |         }
61 | 
62 |         if datagroup := self.get_datagroup():
63 |             explore_lookml["persist_with"] = datagroup
64 | 
65 |         defn: List[Dict[str, Any]] = [
66 |             explore_lookml,
67 |             {"name": "event_names", "hidden": "yes"},
68 |         ]
69 | 
70 |         return defn
71 | 


--------------------------------------------------------------------------------
/generator/explores/glean_ping_explore.py:
--------------------------------------------------------------------------------
  1 | """Glean Ping explore type."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from pathlib import Path
  6 | from typing import Any, Dict, Iterator, List, Optional
  7 | 
  8 | from mozilla_schema_generator.glean_ping import GleanPing
  9 | 
 10 | from ..views import GleanPingView, View
 11 | from .ping_explore import PingExplore
 12 | 
 13 | 
 14 | class GleanPingExplore(PingExplore):
 15 |     """A Glean Ping Table explore."""
 16 | 
 17 |     type: str = "glean_ping_explore"
 18 | 
 19 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
 20 |         """Generate LookML to represent this explore."""
 21 |         repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
 22 |         glean_app = GleanPing(repo)
 23 |         # convert ping description indexes to snake case, as we already have
 24 |         # for the explore name
 25 |         ping_descriptions = {
 26 |             k.replace("-", "_"): v for k, v in glean_app.get_ping_descriptions().items()
 27 |         }
 28 |         # collapse whitespace in the description so the lookml looks a little better
 29 |         ping_description = " ".join(ping_descriptions.get(self.name, "").split())
 30 |         views_lookml = self.get_view_lookml(self.views["base_view"])
 31 | 
 32 |         # The first view, by convention, is always the base view with the
 33 |         # majority of the dimensions from the top level.
 34 |         base = views_lookml["views"][0]
 35 |         base_name = base["name"]
 36 | 
 37 |         joins = []
 38 |         for view in views_lookml["views"][1:]:
 39 |             if view["name"].startswith("suggest__"):
 40 |                 continue
 41 |             view_name = view["name"]
 42 |             metric = "__".join(view["name"].split("__")[1:])
 43 | 
 44 |             if "__labeled_counter__" in metric:
 45 |                 joins.append(
 46 |                     {
 47 |                         "name": view_name,
 48 |                         "relationship": "one_to_many",
 49 |                         "sql": (
 50 |                             f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {view_name} "
 51 |                             f"ON ${{{base_name}.document_id}} = ${{{view_name}.document_id}}"
 52 |                         ),
 53 |                     }
 54 |                 )
 55 |             else:
 56 |                 if metric.startswith("metrics__"):
 57 |                     continue
 58 | 
 59 |                 try:
 60 |                     # get repeated, nested fields that exist as separate views in lookml
 61 |                     base_name, metric = self._get_base_name_and_metric(
 62 |                         view_name=view_name,
 63 |                         views=[v["name"] for v in views_lookml["views"]],
 64 |                     )
 65 |                     metric_name = view_name
 66 | 
 67 |                     joins.append(
 68 |                         {
 69 |                             "name": view_name,
 70 |                             "relationship": "one_to_many",
 71 |                             "sql": (
 72 |                                 f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {metric_name} "
 73 |                             ),
 74 |                         }
 75 |                     )
 76 |                 except Exception:
 77 |                     # ignore nested views that cannot be joined on to the base view
 78 |                     continue
 79 | 
 80 |         base_explore: Dict[str, Any] = {
 81 |             "name": self.name,
 82 |             # list the base explore first by prefixing with a space
 83 |             "view_label": f" {self.name.title()}",
 84 |             "description": f"Explore for the {self.name} ping. {ping_description}",
 85 |             "view_name": self.views["base_view"],
 86 |             "joins": joins,
 87 |         }
 88 | 
 89 |         if datagroup := self.get_datagroup():
 90 |             base_explore["persist_with"] = datagroup
 91 | 
 92 |         required_filters = self.get_required_filters("base_view")
 93 |         if len(required_filters) > 0:
 94 |             base_explore["always_filter"] = {"filters": required_filters}
 95 | 
 96 |         suggests = []
 97 |         for view in views_lookml["views"][1:]:
 98 |             if not view["name"].startswith("suggest__"):
 99 |                 continue
100 |             suggests.append({"name": view["name"], "hidden": "yes"})
101 | 
102 |         return [base_explore] + suggests
103 | 
104 |     @staticmethod
105 |     def from_views(views: List[View]) -> Iterator[PingExplore]:
106 |         """Generate all possible GleanPingExplores from the views."""
107 |         for view in views:
108 |             if view.view_type == GleanPingView.type:
109 |                 yield GleanPingExplore(view.name, {"base_view": view.name})
110 | 
111 |     @staticmethod
112 |     def from_dict(name: str, defn: dict, views_path: Path) -> GleanPingExplore:
113 |         """Get an instance of this explore from a name and dictionary definition."""
114 |         return GleanPingExplore(name, defn["views"], views_path)
115 | 


--------------------------------------------------------------------------------
/generator/explores/growth_accounting_explore.py:
--------------------------------------------------------------------------------
 1 | """Growth Accounting explore type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import View
 9 | from . import Explore
10 | 
11 | 
12 | class GrowthAccountingExplore(Explore):
13 |     """A Growth Accounting Explore, from Baseline Clients Last Seen."""
14 | 
15 |     type: str = "growth_accounting_explore"
16 | 
17 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
18 |         """Generate LookML to represent this explore."""
19 |         explore_lookml = {
20 |             "name": self.name,
21 |             "view_name": self.views["base_view"],
22 |             "joins": self.get_unnested_fields_joins_lookml(),
23 |         }
24 | 
25 |         if datagroup := self.get_datagroup():
26 |             explore_lookml["persist_with"] = datagroup
27 | 
28 |         return [explore_lookml]
29 | 
30 |     @staticmethod
31 |     def from_views(views: List[View]) -> Iterator[GrowthAccountingExplore]:
32 |         """
33 |         If possible, generate a Growth Accounting explore for this namespace.
34 | 
35 |         Growth accounting explores are only created for growth_accounting views.
36 |         """
37 |         for view in views:
38 |             if view.name == "growth_accounting":
39 |                 yield GrowthAccountingExplore(
40 |                     view.name,
41 |                     {"base_view": "growth_accounting"},
42 |                 )
43 | 
44 |     @staticmethod
45 |     def from_dict(name: str, defn: dict, views_path: Path) -> GrowthAccountingExplore:
46 |         """Get an instance of this explore from a dictionary definition."""
47 |         return GrowthAccountingExplore(name, defn["views"], views_path)
48 | 


--------------------------------------------------------------------------------
/generator/explores/metric_definitions_explore.py:
--------------------------------------------------------------------------------
 1 | """Metric Hub metrics explore type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import View
 9 | from . import Explore
10 | 
11 | 
12 | class MetricDefinitionsExplore(Explore):
13 |     """Metric Hub Metrics Explore."""
14 | 
15 |     type: str = "metric_definitions_explore"
16 | 
17 |     def __init__(
18 |         self,
19 |         name: str,
20 |         views: Dict[str, str],
21 |         views_path: Optional[Path] = None,
22 |         defn: Optional[Dict[str, Any]] = None,
23 |     ):
24 |         """Initialize MetricDefinitionsExplore."""
25 |         super().__init__(name, views, views_path)
26 | 
27 |     @staticmethod
28 |     def from_views(views: List[View]) -> Iterator[Explore]:
29 |         """Generate an Operational Monitoring explore for this namespace."""
30 |         for view in views:
31 |             if view.view_type == "metric_definitions_view":
32 |                 yield MetricDefinitionsExplore("metric_definitions", {})
33 | 
34 |     @staticmethod
35 |     def from_dict(name: str, defn: dict, views_path: Path) -> MetricDefinitionsExplore:
36 |         """Get an instance of this explore from a dictionary definition."""
37 |         return MetricDefinitionsExplore(name, defn["views"], views_path, defn)
38 | 
39 |     def _to_lookml(
40 |         self,
41 |         _v1_name: Optional[str],
42 |     ) -> List[Dict[str, Any]]:
43 |         exposed_fields = ["ALL_FIELDS*"]
44 | 
45 |         explore_lookml: Dict[str, Any] = {
46 |             "name": self.name,
47 |             "always_filter": {
48 |                 "filters": [{"submission_date": "7 days"}, {"sampling": "1"}]
49 |             },
50 |             # The base view is the only view that exposes the date and client_id fields.
51 |             # All other views only expose the metric definitions.
52 |             "fields": exposed_fields,
53 |         }
54 | 
55 |         if datagroup := self.get_datagroup():
56 |             explore_lookml["persist_with"] = datagroup
57 | 
58 |         return [explore_lookml]
59 | 
60 |     def get_view_time_partitioning_group(self, view: str) -> Optional[str]:
61 |         """Override time partitioning."""
62 |         return None
63 | 


--------------------------------------------------------------------------------
/generator/explores/operational_monitoring_explore.py:
--------------------------------------------------------------------------------
  1 | """Operational Monitoring Explore type."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from pathlib import Path
  6 | from typing import Any, Dict, Iterator, List, Optional
  7 | 
  8 | from ..views import View
  9 | from . import Explore
 10 | 
 11 | 
 12 | class OperationalMonitoringExplore(Explore):
 13 |     """An Operational Monitoring Explore."""
 14 | 
 15 |     type: str = "operational_monitoring_explore"
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         name: str,
 20 |         views: Dict[str, str],
 21 |         views_path: Optional[Path] = None,
 22 |         defn: Optional[Dict[str, Any]] = None,
 23 |     ):
 24 |         """Initialize OperationalMonitoringExplore."""
 25 |         super().__init__(name, views, views_path)
 26 |         if defn is not None:
 27 |             self.branches = ", ".join(defn["branches"])
 28 |             self.xaxis = defn.get("xaxis")
 29 |             self.dimensions = defn.get("dimensions", {})
 30 |             self.summaries = defn.get("summaries", [])
 31 | 
 32 |     @staticmethod
 33 |     def from_views(views: List[View]) -> Iterator[Explore]:
 34 |         """Generate an Operational Monitoring explore for this namespace."""
 35 |         for view in views:
 36 |             if view.view_type == "operational_monitoring_view":
 37 |                 yield OperationalMonitoringExplore(
 38 |                     "operational_monitoring",
 39 |                     {"base_view": view.name},
 40 |                 )
 41 | 
 42 |     @staticmethod
 43 |     def from_dict(
 44 |         name: str, defn: dict, views_path: Path
 45 |     ) -> OperationalMonitoringExplore:
 46 |         """Get an instance of this explore from a dictionary definition."""
 47 |         return OperationalMonitoringExplore(name, defn["views"], views_path, defn)
 48 | 
 49 |     def _to_lookml(
 50 |         self,
 51 |         v1_name: Optional[str],
 52 |     ) -> List[Dict[str, Any]]:
 53 |         base_view_name = self.views["base_view"]
 54 | 
 55 |         filters = [
 56 |             {f"{base_view_name}.branch": self.branches},
 57 |         ]
 58 |         for dimension, info in self.dimensions.items():
 59 |             if "default" in info:
 60 |                 filters.append({f"{base_view_name}.{dimension}": info["default"]})
 61 | 
 62 |         explore_lookml = {
 63 |             "name": self.views["base_view"],
 64 |             "always_filter": {
 65 |                 "filters": [
 66 |                     {"branch": self.branches},
 67 |                 ]
 68 |             },
 69 |             "hidden": "yes",
 70 |         }
 71 | 
 72 |         if datagroup := self.get_datagroup():
 73 |             explore_lookml["persist_with"] = datagroup
 74 | 
 75 |         defn: List[Dict[str, Any]] = [explore_lookml]
 76 | 
 77 |         return defn
 78 | 
 79 | 
 80 | class OperationalMonitoringAlertingExplore(Explore):
 81 |     """An Operational Monitoring Alerting Explore."""
 82 | 
 83 |     type: str = "operational_monitoring_alerting_explore"
 84 | 
 85 |     def __init__(
 86 |         self,
 87 |         name: str,
 88 |         views: Dict[str, str],
 89 |         views_path: Optional[Path] = None,
 90 |         defn: Optional[Dict[str, Any]] = None,
 91 |     ):
 92 |         """Initialize OperationalMonitoringExplore."""
 93 |         super().__init__(name, views, views_path)
 94 | 
 95 |     @staticmethod
 96 |     def from_views(views: List[View]) -> Iterator[Explore]:
 97 |         """Generate an Operational Monitoring explore for this namespace."""
 98 |         for view in views:
 99 |             if view.view_type in {
100 |                 "operational_monitoring_alerting_view",
101 |             }:
102 |                 yield OperationalMonitoringAlertingExplore(
103 |                     "operational_monitoring",
104 |                     {"base_view": view.name},
105 |                 )
106 | 
107 |     @staticmethod
108 |     def from_dict(
109 |         name: str, defn: dict, views_path: Path
110 |     ) -> OperationalMonitoringAlertingExplore:
111 |         """Get an instance of this explore from a dictionary definition."""
112 |         return OperationalMonitoringAlertingExplore(
113 |             name, defn["views"], views_path, defn
114 |         )
115 | 
116 |     def _to_lookml(
117 |         self,
118 |         v1_name: Optional[str],
119 |     ) -> List[Dict[str, Any]]:
120 |         explore_lookml = {"name": self.views["base_view"], "hidden": "yes"}
121 | 
122 |         if datagroup := self.get_datagroup():
123 |             explore_lookml["persist_with"] = datagroup
124 | 
125 |         defn: List[Dict[str, Any]] = [explore_lookml]
126 | 
127 |         return defn
128 | 


--------------------------------------------------------------------------------
/generator/explores/ping_explore.py:
--------------------------------------------------------------------------------
 1 | """Ping explore type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import PingView, View
 9 | from . import Explore
10 | 
11 | 
12 | class PingExplore(Explore):
13 |     """A Ping Table explore."""
14 | 
15 |     type: str = "ping_explore"
16 | 
17 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
18 |         """Generate LookML to represent this explore."""
19 |         explore_lookml = {
20 |             "name": self.name,
21 |             "view_name": self.views["base_view"],
22 |             "always_filter": {
23 |                 "filters": self.get_required_filters("base_view"),
24 |             },
25 |             "joins": self.get_unnested_fields_joins_lookml(),
26 |         }
27 | 
28 |         if datagroup := self.get_datagroup():
29 |             explore_lookml["persist_with"] = datagroup
30 | 
31 |         return [explore_lookml]
32 | 
33 |     @staticmethod
34 |     def from_views(views: List[View]) -> Iterator[PingExplore]:
35 |         """Generate all possible PingExplores from the views."""
36 |         for view in views:
37 |             if view.view_type == PingView.type:
38 |                 yield PingExplore(view.name, {"base_view": view.name})
39 | 
40 |     @staticmethod
41 |     def from_dict(name: str, defn: dict, views_path: Path) -> PingExplore:
42 |         """Get an instance of this explore from a name and dictionary definition."""
43 |         return PingExplore(name, defn["views"], views_path)
44 | 


--------------------------------------------------------------------------------
/generator/explores/table_explore.py:
--------------------------------------------------------------------------------
 1 | """Table explore type."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Iterator, List, Optional
 7 | 
 8 | from ..views import TableView, View
 9 | from . import Explore
10 | 
11 | ALLOWED_VIEWS = {"events_stream_table"}
12 | 
13 | 
14 | class TableExplore(Explore):
15 |     """A table explore."""
16 | 
17 |     type: str = "table_explore"
18 | 
19 |     def _to_lookml(self, v1_name: Optional[str]) -> List[Dict[str, Any]]:
20 |         """Generate LookML to represent this explore."""
21 |         explore_lookml: Dict[str, Any] = {
22 |             "name": self.name,
23 |             "view_name": self.views["base_view"],
24 |             "joins": self.get_unnested_fields_joins_lookml(),
25 |         }
26 |         if required_filters := self.get_required_filters("base_view"):
27 |             explore_lookml["always_filter"] = {
28 |                 "filters": required_filters,
29 |             }
30 | 
31 |         if datagroup := self.get_datagroup():
32 |             explore_lookml["persist_with"] = datagroup
33 | 
34 |         return [explore_lookml]
35 | 
36 |     @staticmethod
37 |     def from_views(views: List[View]) -> Iterator[TableExplore]:
38 |         """Don't generate all possible TableExplores from the views, only generate for ALLOWED_VIEWS."""
39 |         for view in views:
40 |             if view.view_type == TableView.type:
41 |                 if view.name in ALLOWED_VIEWS:
42 |                     yield TableExplore(view.name, {"base_view": view.name})
43 | 
44 |     @staticmethod
45 |     def from_dict(name: str, defn: dict, views_path: Path) -> TableExplore:
46 |         """Get an instance of this explore from a name and dictionary definition."""
47 |         return TableExplore(name, defn["views"], views_path)
48 | 


--------------------------------------------------------------------------------
/generator/lkml_update.py:
--------------------------------------------------------------------------------
 1 | """An updated lkml parser to handle explore queries."""
 2 | 
 3 | from typing import List, Union
 4 | 
 5 | from lkml.keys import KEYS_WITH_NAME_FIELDS
 6 | from lkml.simple import DictParser
 7 | from lkml.tree import BlockNode, DocumentNode, ListNode, PairNode
 8 | 
 9 | 
10 | def dump(obj: dict) -> str:
11 |     """Dump an object as LookML."""
12 |     parser = UpdatedDictParser()
13 |     tree: DocumentNode = parser.parse(obj)
14 |     return str(tree)
15 | 
16 | 
17 | class UpdatedDictParser(DictParser):
18 |     """An updated DictParser that properly handles queries."""
19 | 
20 |     def parse_any(
21 |         self, key: str, value: Union[str, list, tuple, dict]
22 |     ) -> Union[
23 |         List[Union[BlockNode, ListNode, PairNode]], BlockNode, ListNode, PairNode
24 |     ]:
25 |         """Dynamically serializes a Python object based on its type.
26 | 
27 |         Args:
28 |             key: A LookML field type (e.g. "suggestions" or "hidden")
29 |             value: A string, tuple, or list to serialize
30 |         Raises:
31 |             TypeError: If input value is not of a valid type
32 |         Returns:
33 |             A generator of serialized string chunks
34 |         """
35 |         if isinstance(value, str):
36 |             return self.parse_pair(key, value)
37 |         elif isinstance(value, (list, tuple)):
38 |             if self.is_plural_key(key) and not self.parent_key == "query":
39 |                 # See https://github.com/joshtemple/lkml/issues/53
40 |                 # We check that the parent is not a query to ensure the
41 |                 # query fields don't get unnested
42 |                 return self.expand_list(key, value)
43 |             else:
44 |                 return self.parse_list(key, value)
45 |         elif isinstance(value, dict):
46 |             if key in KEYS_WITH_NAME_FIELDS or "name" not in value.keys():
47 |                 name = None
48 |             else:
49 |                 name = value.pop("name")
50 |             return self.parse_block(key, value, name)
51 |         else:
52 |             raise TypeError("Value must be a string, list, tuple, or dict.")
53 | 


--------------------------------------------------------------------------------
/generator/metrics_utils.py:
--------------------------------------------------------------------------------
 1 | """Utils for working with metric-hub."""
 2 | 
 3 | from typing import List, Optional
 4 | 
 5 | from metric_config_parser.config import ConfigCollection
 6 | from metric_config_parser.metric import MetricDefinition
 7 | 
 8 | METRIC_HUB_REPO = "https://github.com/mozilla/metric-hub"
 9 | LOOKER_METRIC_HUB_REPO = "https://github.com/mozilla/metric-hub/tree/main/looker"
10 | 
11 | 
12 | class _MetricsConfigLoader:
13 |     """Loads metric config files from an external repository."""
14 | 
15 |     config_collection: Optional[ConfigCollection] = None
16 |     repos: List[str] = [METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO]
17 | 
18 |     @property
19 |     def configs(self) -> ConfigCollection:
20 |         configs = getattr(self, "_configs", None)
21 |         if configs:
22 |             return configs
23 | 
24 |         if self.config_collection is None:
25 |             self.config_collection = ConfigCollection.from_github_repos(self.repos)
26 |         self._configs = self.config_collection
27 |         return self._configs
28 | 
29 |     def update_repos(self, repos: List[str]):
30 |         """Change the repos to load configs from."""
31 |         self.repos = repos
32 |         self.config_collection = None
33 | 
34 |     def metrics_of_data_source(
35 |         self, data_source: str, namespace: str
36 |     ) -> List[MetricDefinition]:
37 |         """Get the metric definitions that use a specific data source."""
38 |         metrics = []
39 |         for definition in self.configs.definitions:
40 |             if definition.platform == namespace:
41 |                 for _, metric_definition in definition.spec.metrics.definitions.items():
42 |                     if (
43 |                         metric_definition.data_source
44 |                         and metric_definition.data_source.name == data_source
45 |                     ):
46 |                         metrics.append(metric_definition)
47 | 
48 |         return metrics
49 | 
50 |     def data_sources_of_namespace(self, namespace: str) -> List[str]:
51 |         """
52 |         Get the data source slugs in the specified namespace.
53 | 
54 |         Filter out data sources that are unused.
55 |         """
56 |         data_sources = []
57 |         for definition in self.configs.definitions:
58 |             for data_source_slug in definition.spec.data_sources.definitions.keys():
59 |                 if (
60 |                     definition.platform == namespace
61 |                     and len(
62 |                         MetricsConfigLoader.metrics_of_data_source(
63 |                             data_source_slug, definition.platform
64 |                         )
65 |                     )
66 |                     > 0
67 |                 ):
68 |                     data_sources.append(data_source_slug)
69 | 
70 |         return data_sources
71 | 
72 | 
73 | MetricsConfigLoader = _MetricsConfigLoader()
74 | 


--------------------------------------------------------------------------------
/generator/operational_monitoring_utils.py:
--------------------------------------------------------------------------------
 1 | """Utils for operational monitoring."""
 2 | 
 3 | from multiprocessing.pool import ThreadPool
 4 | from typing import Any, Dict, List, Optional, Tuple
 5 | 
 6 | from google.api_core import exceptions
 7 | from google.cloud import bigquery
 8 | 
 9 | from .views import lookml_utils
10 | 
11 | 
12 | def _default_helper(
13 |     bq_client: bigquery.Client, table: str, dimension: str
14 | ) -> Tuple[Optional[str], dict]:
15 |     query_job = bq_client.query(
16 |         f"""
17 |             SELECT DISTINCT {dimension} AS option, COUNT(*)
18 |             FROM {table}
19 |             WHERE {dimension} IS NOT NULL
20 |             GROUP BY 1
21 |             ORDER BY 2 DESC
22 |             LIMIT 10
23 |         """
24 |     )
25 | 
26 |     dimension_options = list(query_job.result())
27 | 
28 |     if len(dimension_options) > 0:
29 |         return dimension, {
30 |             "default": dimension_options[0]["option"],
31 |             "options": [d["option"] for d in dimension_options],
32 |         }
33 |     return None, {}
34 | 
35 | 
36 | def get_dimension_defaults(
37 |     bq_client: bigquery.Client, table: str, dimensions: List[str]
38 | ) -> Dict[str, Any]:
39 |     """
40 |     Find default values for certain dimensions.
41 | 
42 |     For a given Operational Monitoring dimension, find its default (most common)
43 |     value and its top 10 most common to be used as dropdown options.
44 |     """
45 |     with ThreadPool(4) as pool:
46 |         return {
47 |             key: value
48 |             for key, value in pool.starmap(
49 |                 _default_helper,
50 |                 [[bq_client, table, dimension] for dimension in dimensions],
51 |             )
52 |             if key is not None
53 |         }
54 | 
55 | 
56 | def get_xaxis_val(table: str, dryrun) -> str:
57 |     """
58 |     Return whether the x-axis should be build_id or submission_date.
59 | 
60 |     This is based on which one is found in the table provided.
61 |     """
62 |     all_dimensions = lookml_utils._generate_dimensions(table, dryrun=dryrun)
63 |     return (
64 |         "build_id"
65 |         if "build_id" in {dimension["name"] for dimension in all_dimensions}
66 |         else "submission_date"
67 |     )
68 | 
69 | 
70 | def get_active_projects(
71 |     bq_client: bigquery.Client, project_table: str
72 | ) -> List[Dict[str, Any]]:
73 |     """Select all operational monitoring projects."""
74 |     try:
75 |         query_job = bq_client.query(
76 |             f"""
77 |                 SELECT *
78 |                 FROM `{project_table}`
79 |                 WHERE
80 |                     end_date > CURRENT_DATE() OR
81 |                     end_date IS NULL
82 |             """
83 |         )
84 | 
85 |         projects = [dict(row) for row in query_job.result()]
86 |     except exceptions.Forbidden:
87 |         projects = []
88 |     return projects
89 | 


--------------------------------------------------------------------------------
/generator/spoke.py:
--------------------------------------------------------------------------------
  1 | """Generate directories and models for new namespaces."""
  2 | 
  3 | import logging
  4 | import os
  5 | import shutil
  6 | from collections import defaultdict
  7 | from pathlib import Path
  8 | from typing import Dict, List, TypedDict
  9 | 
 10 | import click
 11 | import lkml
 12 | import looker_sdk
 13 | import yaml
 14 | 
 15 | from .lookml import ViewDict
 16 | 
 17 | MODEL_SETS_BY_INSTANCE: Dict[str, List[str]] = {
 18 |     "https://mozilladev.cloud.looker.com": ["mozilla_confidential"],
 19 |     "https://mozillastaging.cloud.looker.com": ["mozilla_confidential"],
 20 |     "https://mozilla.cloud.looker.com": ["mozilla_confidential"],
 21 | }
 22 | 
 23 | DEFAULT_DB_CONNECTION = "telemetry"
 24 | 
 25 | 
 26 | class ExploreDict(TypedDict):
 27 |     """Represent an explore definition."""
 28 | 
 29 |     type: str
 30 |     views: List[Dict[str, str]]
 31 | 
 32 | 
 33 | class NamespaceDict(TypedDict):
 34 |     """Represent a Namespace definition."""
 35 | 
 36 |     views: ViewDict
 37 |     explores: ExploreDict
 38 |     pretty_name: str
 39 |     glean_app: bool
 40 |     connection: str
 41 |     spoke: str
 42 | 
 43 | 
 44 | def setup_env_with_looker_creds() -> bool:
 45 |     """
 46 |     Set up env with looker credentials.
 47 | 
 48 |     Returns TRUE if the config is complete.
 49 |     """
 50 |     client_id = os.environ.get("LOOKER_API_CLIENT_ID")
 51 |     client_secret = os.environ.get("LOOKER_API_CLIENT_SECRET")
 52 |     instance = os.environ.get("LOOKER_INSTANCE_URI")
 53 | 
 54 |     if client_id is None or client_secret is None or instance is None:
 55 |         return False
 56 | 
 57 |     os.environ["LOOKERSDK_BASE_URL"] = instance
 58 |     os.environ["LOOKERSDK_API_VERSION"] = "4.0"
 59 |     os.environ["LOOKERSDK_VERIFY_SSL"] = "true"
 60 |     os.environ["LOOKERSDK_TIMEOUT"] = "120"
 61 |     os.environ["LOOKERSDK_CLIENT_ID"] = client_id
 62 |     os.environ["LOOKERSDK_CLIENT_SECRET"] = client_secret
 63 | 
 64 |     return True
 65 | 
 66 | 
 67 | def generate_model(
 68 |     spoke_path: Path, name: str, namespace_defn: NamespaceDict, db_connection: str
 69 | ) -> Path:
 70 |     """
 71 |     Generate a model file for a namespace.
 72 | 
 73 |     We want these to have a nice label and a unique name.
 74 |     We only import explores and dashboards, as we want those
 75 |     to auto-import upon generation.
 76 | 
 77 |     Views are not imported by default, since they should
 78 |     be added one-by-one if they are included in an explore.
 79 |     """
 80 |     logging.info(f"Generating model {name}...")
 81 |     model_defn = {
 82 |         "connection": db_connection,
 83 |         "label": namespace_defn["pretty_name"],
 84 |     }
 85 | 
 86 |     # automatically import generated explores for new glean apps
 87 |     has_explores = len(namespace_defn.get("explores", {})) > 0
 88 | 
 89 |     path = spoke_path / name / f"{name}.model.lkml"
 90 |     # lkml.dump may return None, in which case write an empty file
 91 |     footer_text = f"""
 92 | # Include files from looker-hub or spoke-default below. For example:
 93 | {'' if has_explores else '# '}include: "//looker-hub/{name}/explores/*"
 94 | # include: "//looker-hub/{name}/dashboards/*"
 95 | # include: "views/*"
 96 | # include: "explores/*"
 97 | # include: "dashboards/*"
 98 | """
 99 |     model_text = lkml.dump(model_defn)
100 |     if model_text is None:
101 |         path.write_text("")
102 |     else:
103 |         path.write_text(model_text + footer_text)
104 | 
105 |     return path
106 | 
107 | 
108 | def configure_model(
109 |     sdk: looker_sdk.methods40.Looker40SDK,
110 |     model_name: str,
111 |     db_connection: str,
112 |     spoke_project: str,
113 | ):
114 |     """Configure a Looker model by name."""
115 |     instance = os.environ["LOOKER_INSTANCE_URI"]
116 |     logging.info(f"Configuring model {model_name}...")
117 | 
118 |     try:
119 |         sdk.lookml_model(model_name)
120 |         logging.info("Model is configured!")
121 |         return
122 |     except looker_sdk.error.SDKError:
123 |         pass
124 | 
125 |     sdk.create_lookml_model(
126 |         looker_sdk.models40.WriteLookmlModel(
127 |             allowed_db_connection_names=[db_connection],
128 |             name=model_name,
129 |             project_name=spoke_project,
130 |         )
131 |     )
132 | 
133 |     for model_set_name in MODEL_SETS_BY_INSTANCE[instance]:
134 |         model_sets = sdk.search_model_sets(name=model_set_name)
135 |         if len(model_sets) != 1:
136 |             raise click.ClickException("Error: Found more than one matching model set")
137 | 
138 |         model_set = model_sets[0]
139 |         models, _id = model_set.models, model_set.id
140 |         if models is None or _id is None:
141 |             raise click.ClickException("Error: Missing models or name from model_set")
142 | 
143 |         sdk.update_model_set(
144 |             _id, looker_sdk.models40.WriteModelSet(models=list(models) + [model_name])
145 |         )
146 | 
147 | 
148 | def generate_directories(
149 |     namespaces: Dict[str, NamespaceDict], base_dir: Path, sdk_setup=False
150 | ):
151 |     """Generate directories and model for a namespace, if it doesn't exist."""
152 |     seen_spoke_namespaces = defaultdict(list)
153 |     for namespace, defn in namespaces.items():
154 |         spoke = defn["spoke"]
155 |         seen_spoke_namespaces[spoke].append(namespace)
156 | 
157 |         spoke_dir = base_dir / spoke
158 |         spoke_dir.mkdir(parents=True, exist_ok=True)
159 |         print(f"Writing {namespace} to {spoke_dir}")
160 |         existing_dirs = {p.name for p in spoke_dir.iterdir()}
161 | 
162 |         if namespace in existing_dirs:
163 |             continue
164 | 
165 |         (spoke_dir / namespace).mkdir()
166 |         for dirname in ("views", "explores", "dashboards"):
167 |             (spoke_dir / namespace / dirname).mkdir()
168 |             (spoke_dir / namespace / dirname / ".gitkeep").touch()
169 | 
170 |         db_connection: str = defn.get("connection", DEFAULT_DB_CONNECTION)
171 |         generate_model(spoke_dir, namespace, defn, db_connection)
172 | 
173 |         if sdk_setup:
174 |             spoke_project = spoke.lstrip("looker-")
175 |             sdk = looker_sdk.init40()
176 |             logging.info("Looker SDK 4.0 initialized successfully.")
177 |             configure_model(sdk, namespace, db_connection, spoke_project)
178 | 
179 |     # remove directories for namespaces that got removed
180 |     for spoke in seen_spoke_namespaces.keys():
181 |         spoke_dir = base_dir / spoke
182 |         existing_dirs = {p.name for p in spoke_dir.iterdir()}
183 | 
184 |         for existing_dir in existing_dirs:
185 |             # make sure the directory belongs to a namespace by checking if a model file exists
186 |             if (spoke_dir / existing_dir / f"{existing_dir}.model.lkml").is_file():
187 |                 if existing_dir not in seen_spoke_namespaces[spoke]:
188 |                     # namespace does not exists anymore, remove directory
189 |                     print(f"Removing {existing_dir} from {spoke_dir}")
190 |                     shutil.rmtree(spoke_dir / existing_dir)
191 | 
192 | 
193 | @click.command(help=__doc__)
194 | @click.option(
195 |     "--namespaces",
196 |     default="namespaces.yaml",
197 |     type=click.File(),
198 |     help="Path to the namespaces.yaml file.",
199 | )
200 | @click.option(
201 |     "--spoke-dir",
202 |     default=".",
203 |     type=click.Path(file_okay=False, dir_okay=True, writable=True),
204 |     help="Directory containing the Looker spoke.",
205 | )
206 | def update_spoke(namespaces, spoke_dir):
207 |     """Generate updates to spoke project."""
208 |     _namespaces = yaml.safe_load(namespaces)
209 |     sdk_setup = setup_env_with_looker_creds()
210 |     generate_directories(_namespaces, Path(spoke_dir), sdk_setup)
211 | 


--------------------------------------------------------------------------------
/generator/utils.py:
--------------------------------------------------------------------------------
 1 | """Utils."""
 2 | 
 3 | import urllib.request
 4 | from pathlib import Path
 5 | 
 6 | LOOKER_HUB_URL = "https://raw.githubusercontent.com/mozilla/looker-hub/main"
 7 | 
 8 | 
 9 | def get_file_from_looker_hub(path: Path):
10 |     """Download a specific lookml artifact from looker-hub."""
11 |     file = path.name
12 |     artifact_type = path.parent.name
13 |     namespace = path.parent.parent.name
14 |     print(f"{LOOKER_HUB_URL}/{namespace}/{artifact_type}/{file}")
15 |     with urllib.request.urlopen(
16 |         f"{LOOKER_HUB_URL}/{namespace}/{artifact_type}/{file}"
17 |     ) as response:
18 |         lookml = response.read().decode(response.headers.get_content_charset())
19 |         path.parent.mkdir(parents=True, exist_ok=True)
20 |         path.write_text(lookml)
21 | 


--------------------------------------------------------------------------------
/generator/views/__init__.py:
--------------------------------------------------------------------------------
 1 | """All available Looker views."""
 2 | 
 3 | from .client_counts_view import ClientCountsView
 4 | from .events_view import EventsView
 5 | from .funnel_analysis_view import FunnelAnalysisView
 6 | from .glean_ping_view import GleanPingView
 7 | from .growth_accounting_view import GrowthAccountingView
 8 | from .metric_definitions_view import MetricDefinitionsView
 9 | from .operational_monitoring_alerting_view import OperationalMonitoringAlertingView
10 | from .operational_monitoring_view import OperationalMonitoringView
11 | from .ping_view import PingView
12 | from .table_view import TableView
13 | from .view import View, ViewDict  # noqa: F401
14 | 
15 | VIEW_TYPES = {
16 |     ClientCountsView.type: ClientCountsView,
17 |     EventsView.type: EventsView,
18 |     FunnelAnalysisView.type: FunnelAnalysisView,
19 |     OperationalMonitoringView.type: OperationalMonitoringView,
20 |     OperationalMonitoringAlertingView.type: OperationalMonitoringAlertingView,
21 |     MetricDefinitionsView.type: MetricDefinitionsView,
22 |     GleanPingView.type: GleanPingView,
23 |     PingView.type: PingView,
24 |     GrowthAccountingView.type: GrowthAccountingView,
25 |     TableView.type: TableView,
26 | }
27 | 


--------------------------------------------------------------------------------
/generator/views/client_counts_view.py:
--------------------------------------------------------------------------------
  1 | """Class to describe a Client Counts View."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from copy import deepcopy
  6 | from typing import Any, Dict, Iterator, List, Optional, Union
  7 | 
  8 | from .view import View, ViewDict
  9 | 
 10 | 
 11 | class ClientCountsView(View):
 12 |     """A view for Client Counting measures."""
 13 | 
 14 |     type: str = "client_counts_view"
 15 | 
 16 |     default_dimension_groups: List[Dict[str, Union[str, List[str]]]] = [
 17 |         {
 18 |             "name": "since_first_seen",
 19 |             "type": "duration",
 20 |             "description": "Amount of time that has passed since the client was first seen.",
 21 |             "sql_start": "CAST(${TABLE}.first_seen_date AS TIMESTAMP)",
 22 |             "sql_end": "CAST(${TABLE}.submission_date AS TIMESTAMP)",
 23 |             "intervals": ["day", "week", "month", "year"],
 24 |         }
 25 |     ]
 26 | 
 27 |     default_dimensions: List[Dict[str, str]] = [
 28 |         {
 29 |             "name": "have_completed_period",
 30 |             "type": "yesno",
 31 |             "description": "Only for use with cohort analysis. "
 32 |             "Filter on true to remove the tail of incomplete data from cohorts. "
 33 |             "Indicates whether the cohort for this row have all had a chance to complete this interval. "
 34 |             "For example, new clients from yesterday have not all had a chance to send a ping for today.",
 35 |             "sql": """
 36 |               DATE_ADD(
 37 |                 {% if client_counts.first_seen_date._is_selected %}
 38 |                   DATE_ADD(DATE(${client_counts.first_seen_date}), INTERVAL 1 DAY)
 39 |                 {% elsif client_counts.first_seen_week._is_selected %}
 40 |                   DATE_ADD(DATE(${client_counts.first_seen_week}), INTERVAL 1 WEEK)
 41 |                 {% elsif client_counts.first_seen_month._is_selected %}
 42 |                   DATE_ADD(PARSE_DATE('%Y-%m', ${client_counts.first_seen_month}), INTERVAL 1 MONTH)
 43 |                 {% elsif client_counts.first_seen_year._is_selected %}
 44 |                   DATE_ADD(DATE(${client_counts.first_seen_year}, 1, 1), INTERVAL 1 YEAR)
 45 |                 {% endif %}
 46 |                 ,
 47 |                 {% if client_counts.days_since_first_seen._is_selected %}
 48 |                   INTERVAL CAST(${client_counts.days_since_first_seen} AS INT64) DAY
 49 |                 {% elsif client_counts.weeks_since_first_seen._is_selected %}
 50 |                   INTERVAL CAST(${client_counts.weeks_since_first_seen} AS INT64) WEEK
 51 |                 {% elsif client_counts.months_since_first_seen._is_selected %}
 52 |                   INTERVAL CAST(${client_counts.months_since_first_seen} AS INT64) MONTH
 53 |                 {% elsif client_counts.years_since_first_seen._is_selected %}
 54 |                   INTERVAL CAST(${client_counts.months_since_first_seen} AS INT64) YEAR
 55 |                 {% endif %}
 56 |               ) < current_date
 57 |               """,
 58 |         }
 59 |     ]
 60 | 
 61 |     default_measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [
 62 |         {
 63 |             "name": "client_count",
 64 |             "type": "number",
 65 |             "description": "The number of clients, "
 66 |             "determined by whether they sent a baseline ping on the day in question.",
 67 |             "sql": "COUNT(DISTINCT ${TABLE}.client_id)",
 68 |         }
 69 |     ]
 70 | 
 71 |     def __init__(
 72 |         self,
 73 |         namespace: str,
 74 |         tables: List[Dict[str, str]],
 75 |         name: str = "client_counts",
 76 |     ):
 77 |         """Get an instance of a ClientCountsView."""
 78 |         super().__init__(namespace, name, ClientCountsView.type, tables)
 79 | 
 80 |     @classmethod
 81 |     def from_db_views(
 82 |         klass,
 83 |         namespace: str,
 84 |         is_glean: bool,
 85 |         channels: List[Dict[str, str]],
 86 |         db_views: dict,
 87 |     ) -> Iterator[ClientCountsView]:
 88 |         """Get Client Count Views from db views and app variants."""
 89 |         # We can guarantee there will always be at least one channel,
 90 |         # because this comes from the associated _get_glean_repos in
 91 |         # namespaces.py
 92 |         dataset = next(
 93 |             (channel for channel in channels if channel.get("channel") == "release"),
 94 |             channels[0],
 95 |         )["dataset"]
 96 | 
 97 |         for view_id, references in db_views[dataset].items():
 98 |             if view_id == "baseline_clients_daily" or view_id == "clients_daily":
 99 |                 yield ClientCountsView(
100 |                     namespace, [{"table": f"mozdata.{dataset}.{view_id}"}]
101 |                 )
102 | 
103 |     @classmethod
104 |     def from_dict(
105 |         klass, namespace: str, name: str, _dict: ViewDict
106 |     ) -> ClientCountsView:
107 |         """Get a view from a name and dict definition."""
108 |         return ClientCountsView(namespace, _dict["tables"], name)
109 | 
110 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
111 |         """Generate LookML for this view."""
112 |         table = self.tables[0]["table"]
113 | 
114 |         base_view = "baseline_clients_daily_table"
115 |         if table is not None:
116 |             base_view = table.split(".")[-1] + "_table"
117 | 
118 |         view_defn: Dict[str, Any] = {
119 |             "extends": [base_view],
120 |             "name": self.name,
121 |         }
122 | 
123 |         # add dimensions and dimension groups
124 |         view_defn["dimensions"] = deepcopy(ClientCountsView.default_dimensions)
125 |         view_defn["dimension_groups"] = deepcopy(
126 |             ClientCountsView.default_dimension_groups
127 |         )
128 | 
129 |         # add measures
130 |         view_defn["measures"] = self.get_measures()
131 | 
132 |         return {
133 |             "includes": [base_view + ".view.lkml"],
134 |             "views": [view_defn],
135 |         }
136 | 
137 |     def get_measures(self) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
138 |         """Generate measures for the Growth Accounting Framework."""
139 |         return deepcopy(ClientCountsView.default_measures)
140 | 


--------------------------------------------------------------------------------
/generator/views/datagroups.py:
--------------------------------------------------------------------------------
  1 | """Generate datagroup lkml files for each namespace."""
  2 | 
  3 | import logging
  4 | from dataclasses import dataclass
  5 | from pathlib import Path
  6 | from typing import Any, List, Optional
  7 | 
  8 | import lkml
  9 | 
 10 | from generator.dryrun import DryRunError, Errors
 11 | from generator.namespaces import DEFAULT_GENERATED_SQL_URI
 12 | from generator.utils import get_file_from_looker_hub
 13 | from generator.views import View, lookml_utils
 14 | from generator.views.lookml_utils import BQViewReferenceMap
 15 | 
 16 | DEFAULT_MAX_CACHE_AGE = "24 hours"
 17 | 
 18 | SQL_TRIGGER_TEMPLATE_SINGLE_TABLE = """
 19 |     SELECT MAX(storage_last_modified_time) AS storage_last_modified_time
 20 |     FROM `{project_id}`.`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE
 21 |     WHERE {table}
 22 | """
 23 | 
 24 | SQL_TRIGGER_TEMPLATE_ALL_TABLES = """
 25 |     SELECT MAX(storage_last_modified_time)
 26 |     FROM (
 27 |         {tables}
 28 |     )
 29 | """
 30 | 
 31 | # To map views to their underlying tables:
 32 | DATASET_VIEW_MAP = lookml_utils.get_bigquery_view_reference_map(
 33 |     DEFAULT_GENERATED_SQL_URI
 34 | )
 35 | 
 36 | FILE_HEADER = """# *Do not manually modify this file*
 37 | 
 38 | # This file has been generated via https://github.com/mozilla/lookml-generator
 39 | 
 40 | # Using a datagroup in an Explore: https://cloud.google.com/looker/docs/reference/param-explore-persist-with
 41 | # Using a datagroup in a derived table: https://cloud.google.com/looker/docs/reference/param-view-datagroup-trigger
 42 | 
 43 | """
 44 | 
 45 | 
 46 | @dataclass(frozen=True, eq=True)
 47 | class Datagroup:
 48 |     """Represents a Datagroup."""
 49 | 
 50 |     name: str
 51 |     label: str
 52 |     sql_trigger: str
 53 |     description: str
 54 |     max_cache_age: str = DEFAULT_MAX_CACHE_AGE
 55 | 
 56 |     def __str__(self) -> str:
 57 |         """Return the LookML string representation of a Datagroup."""
 58 |         return lkml.dump({"datagroups": [self.__dict__]})  # type: ignore
 59 | 
 60 |     def __lt__(self, other) -> bool:
 61 |         """Make datagroups sortable."""
 62 |         return self.name < other.name
 63 | 
 64 | 
 65 | def _get_datagroup_from_bigquery_tables(
 66 |     project_id, tables, view: View
 67 | ) -> Optional[Datagroup]:
 68 |     """Use template and default values to create a Datagroup from a BQ Table."""
 69 |     if len(tables) == 0:
 70 |         return None
 71 | 
 72 |     datagroup_tables = []
 73 |     for table in tables:
 74 |         dataset_id = table[1]
 75 |         table_id = table[2]
 76 | 
 77 |         datagroup_tables.append(
 78 |             SQL_TRIGGER_TEMPLATE_SINGLE_TABLE.format(
 79 |                 project_id=table[0],
 80 |                 table=f"(table_schema = '{dataset_id}' AND table_name = '{table_id}')",
 81 |             )
 82 |         )
 83 | 
 84 |     # create a datagroup associated to a view which will be used for caching
 85 |     return Datagroup(
 86 |         name=f"{view.name}_last_updated",
 87 |         label=f"{view.name} Last Updated",
 88 |         description=f"Updates for {view.name} when referenced tables are modified.",
 89 |         sql_trigger=SQL_TRIGGER_TEMPLATE_ALL_TABLES.format(
 90 |             project_id=project_id, tables=" UNION ALL ".join(datagroup_tables)
 91 |         ),
 92 |     )
 93 | 
 94 | 
 95 | def _get_datagroup_from_bigquery_view(
 96 |     project_id,
 97 |     dataset_id,
 98 |     table_id,
 99 |     dataset_view_map: BQViewReferenceMap,
100 |     view: View,
101 | ) -> Optional[Datagroup]:
102 |     # Dataset view map only contains references for shared-prod views.
103 |     full_table_id = f"{project_id}.{dataset_id}.{table_id}"
104 | 
105 |     view_references = _get_referenced_tables(
106 |         project_id, dataset_id, table_id, dataset_view_map, []
107 |     )
108 | 
109 |     if not view_references or len(view_references) == 0:
110 |         # Some views might not reference a source table
111 |         logging.debug(f"Unable to find a source for {full_table_id} in generated-sql.")
112 |         return None
113 | 
114 |     return _get_datagroup_from_bigquery_tables(project_id, view_references, view)
115 | 
116 | 
117 | def _get_referenced_tables(
118 |     project_id,
119 |     dataset_id,
120 |     table_id,
121 |     dataset_view_map: BQViewReferenceMap,
122 |     seen: List[List[str]],
123 | ) -> List[List[str]]:
124 |     """
125 |     Return a list of all tables referenced by the provided view.
126 | 
127 |     Recursively, resolve references of referenced views to only get table dependencies.
128 |     """
129 |     if [project_id, dataset_id, table_id] in seen:
130 |         return [[project_id, dataset_id, table_id]]
131 | 
132 |     seen += [[project_id, dataset_id, table_id]]
133 | 
134 |     dataset_view_references = dataset_view_map.get(dataset_id)
135 | 
136 |     if dataset_view_references is None:
137 |         return [[project_id, dataset_id, table_id]]
138 | 
139 |     view_references = dataset_view_references.get(table_id)
140 |     if view_references is None:
141 |         return [[project_id, dataset_id, table_id]]
142 | 
143 |     return [
144 |         ref
145 |         for view_reference in view_references
146 |         for ref in _get_referenced_tables(
147 |             view_reference[0],
148 |             view_reference[1],
149 |             view_reference[2],
150 |             dataset_view_map,
151 |             seen.copy(),
152 |         )
153 |         if view_reference not in seen
154 |     ]
155 | 
156 | 
157 | def _generate_view_datagroup(
158 |     view: View,
159 |     dataset_view_map: BQViewReferenceMap,
160 |     dryrun,
161 | ) -> Optional[Datagroup]:
162 |     """Generate the Datagroup LookML for a Looker View."""
163 |     if len(view.tables) == 0:
164 |         return None
165 | 
166 |     # Use the release channel table or the first available table (usually the only one):
167 |     view_tables = next(
168 |         (table for table in view.tables if table.get("channel") == "release"),
169 |         view.tables[0],
170 |     )
171 | 
172 |     if "table" not in view_tables:
173 |         return None
174 | 
175 |     view_table = view_tables["table"]
176 | 
177 |     [project, dataset, table] = view_table.split(".")
178 |     table_metadata = dryrun.create(
179 |         project=project,
180 |         dataset=dataset,
181 |         table=table,
182 |     ).get_table_metadata()
183 | 
184 |     if "TABLE" == table_metadata.get("tableType"):
185 |         datagroups = _get_datagroup_from_bigquery_tables(
186 |             project, [[project, dataset, table]], view
187 |         )
188 |         return datagroups
189 |     elif "VIEW" == table_metadata.get("tableType"):
190 |         datagroups = _get_datagroup_from_bigquery_view(
191 |             project, dataset, table, dataset_view_map, view
192 |         )
193 |         return datagroups
194 | 
195 |     return None
196 | 
197 | 
198 | def generate_datagroup(
199 |     view: View,
200 |     target_dir: Path,
201 |     namespace: str,
202 |     dryrun,
203 | ) -> Any:
204 |     """Generate and write a datagroups.lkml file to the namespace folder."""
205 |     datagroups_folder_path = target_dir / namespace / "datagroups"
206 | 
207 |     datagroup = None
208 |     try:
209 |         datagroup = _generate_view_datagroup(view, DATASET_VIEW_MAP, dryrun)
210 |     except DryRunError as e:
211 |         if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function:
212 |             path = datagroups_folder_path / f"{e.table_id}_last_updated.datagroup.lkml"
213 |             print(
214 |                 f"Permission error dry running: {path}. Copy existing file from looker-hub."
215 |             )
216 |             try:
217 |                 get_file_from_looker_hub(path)
218 |             except Exception as ex:
219 |                 print(f"Skip generating datagroup for {path}: {ex}")
220 |         else:
221 |             raise
222 | 
223 |     datagroup_paths = []
224 |     if datagroup:
225 |         datagroups_folder_path.mkdir(exist_ok=True)
226 |         datagroup_lkml_path = (
227 |             datagroups_folder_path / f"{datagroup.name}.datagroup.lkml"
228 |         )
229 |         datagroup_lkml_path.write_text(FILE_HEADER + str(datagroup))
230 |         datagroup_paths.append(datagroup_lkml_path)
231 | 
232 |     return datagroup_paths
233 | 


--------------------------------------------------------------------------------
/generator/views/events_view.py:
--------------------------------------------------------------------------------
  1 | """Class to describe an Events view."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from copy import deepcopy
  6 | from typing import Any, Dict, Iterator, List, Optional
  7 | 
  8 | from . import lookml_utils
  9 | from .view import View, ViewDict
 10 | 
 11 | 
 12 | class EventsView(View):
 13 |     """A view for querying events data, with one row per-event."""
 14 | 
 15 |     type: str = "events_view"
 16 | 
 17 |     default_measures: List[Dict[str, str]] = [
 18 |         {
 19 |             "name": "event_count",
 20 |             "type": "count",
 21 |             "description": ("The number of times the event(s) occurred."),
 22 |         },
 23 |     ]
 24 | 
 25 |     def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 26 |         """Get an instance of an EventsView."""
 27 |         super().__init__(namespace, name, EventsView.type, tables)
 28 | 
 29 |     @classmethod
 30 |     def from_db_views(
 31 |         klass,
 32 |         namespace: str,
 33 |         is_glean: bool,
 34 |         channels: List[Dict[str, str]],
 35 |         db_views: dict,
 36 |     ) -> Iterator[EventsView]:
 37 |         """Get Events Views from db views and app variants."""
 38 |         # We can guarantee there will always be at least one channel,
 39 |         # because this comes from the associated _get_glean_repos in
 40 |         # namespaces.py
 41 |         dataset = next(
 42 |             (channel for channel in channels if channel.get("channel") == "release"),
 43 |             channels[0],
 44 |         )["dataset"]
 45 | 
 46 |         for view_id, references in db_views[dataset].items():
 47 |             if view_id == "events_unnested":
 48 |                 yield EventsView(
 49 |                     namespace,
 50 |                     "events",
 51 |                     [
 52 |                         {
 53 |                             "events_table_view": "events_unnested_table",
 54 |                             "base_table": f"mozdata.{dataset}.{view_id}",
 55 |                         }
 56 |                     ],
 57 |                 )
 58 | 
 59 |     @classmethod
 60 |     def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> EventsView:
 61 |         """Get a view from a name and dict definition."""
 62 |         return EventsView(namespace, name, _dict["tables"])
 63 | 
 64 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 65 |         """Generate LookML for this view."""
 66 |         view_defn: Dict[str, Any] = {
 67 |             "extends": [self.tables[0]["events_table_view"]],
 68 |             "name": self.name,
 69 |         }
 70 | 
 71 |         # add measures
 72 |         dimensions = lookml_utils._generate_dimensions(
 73 |             self.tables[0]["base_table"], dryrun=dryrun
 74 |         )
 75 |         view_defn["measures"] = self.get_measures(dimensions)
 76 | 
 77 |         # set document_id as primary key if it exists in the underlying table
 78 |         # this will allow one_to_many joins
 79 |         event_id_dimension = self.generate_event_id_dimension(dimensions)
 80 |         if event_id_dimension is not None:
 81 |             view_defn["dimensions"] = [event_id_dimension]
 82 | 
 83 |         return {
 84 |             "includes": [f"{self.tables[0]['events_table_view']}.view.lkml"],
 85 |             "views": [view_defn],
 86 |         }
 87 | 
 88 |     def get_measures(self, dimensions) -> List[Dict[str, str]]:
 89 |         """Generate measures for Events Views."""
 90 |         measures = deepcopy(EventsView.default_measures)
 91 |         client_id_field = self.get_client_id(dimensions, "events")
 92 |         if client_id_field is not None:
 93 |             measures.append(
 94 |                 {
 95 |                     "name": "client_count",
 96 |                     "type": "count_distinct",
 97 |                     "sql": f"${{{client_id_field}}}",
 98 |                     "description": (
 99 |                         "The number of clients that completed the event(s)."
100 |                     ),
101 |                 }
102 |             )
103 | 
104 |         return measures
105 | 
106 |     def generate_event_id_dimension(
107 |         self, dimensions: list[dict]
108 |     ) -> Optional[Dict[str, str]]:
109 |         """Generate the event_id dimension to be used as a primary key for a one to many join."""
110 |         event_id = self.select_dimension("event_id", dimensions, "events")
111 |         if event_id:
112 |             return {
113 |                 "name": "event_id",
114 |                 "primary_key": "yes",
115 |             }
116 |         return None
117 | 


--------------------------------------------------------------------------------
/generator/views/growth_accounting_view.py:
--------------------------------------------------------------------------------
  1 | """Class to describe a Growth Accounting View."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from copy import deepcopy
  6 | from itertools import filterfalse
  7 | from typing import Any, Dict, Iterator, List, Optional, Union
  8 | 
  9 | from . import lookml_utils
 10 | from .view import View, ViewDict
 11 | 
 12 | 
 13 | class GrowthAccountingView(View):
 14 |     """A view for growth accounting measures."""
 15 | 
 16 |     type: str = "growth_accounting_view"
 17 |     DEFAULT_IDENTIFIER_FIELD: str = "client_id"
 18 | 
 19 |     other_dimensions: List[Dict[str, str]] = [
 20 |         {
 21 |             "name": "first",
 22 |             "sql": "{TABLE}.first",
 23 |             "type": "yesno",
 24 |             "hidden": "yes",
 25 |         }
 26 |     ]
 27 | 
 28 |     default_measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [
 29 |         {
 30 |             "name": "overall_active_previous",
 31 |             "type": "count",
 32 |             "filters": [{"active_last_week": "yes"}],
 33 |         },
 34 |         {
 35 |             "name": "overall_active_current",
 36 |             "type": "count",
 37 |             "filters": [{"active_this_week": "yes"}],
 38 |         },
 39 |         {
 40 |             "name": "overall_resurrected",
 41 |             "type": "count",
 42 |             "filters": [
 43 |                 {"new_last_week": "no"},
 44 |                 {"new_this_week": "no"},
 45 |                 {"active_last_week": "no"},
 46 |                 {"active_this_week": "yes"},
 47 |             ],
 48 |         },
 49 |         {
 50 |             "name": "new_users",
 51 |             "type": "count",
 52 |             "filters": [{"new_this_week": "yes"}, {"active_this_week": "yes"}],
 53 |         },
 54 |         {
 55 |             "name": "established_users_returning",
 56 |             "type": "count",
 57 |             "filters": [
 58 |                 {"new_last_week": "no"},
 59 |                 {"new_this_week": "no"},
 60 |                 {"active_last_week": "yes"},
 61 |                 {"active_this_week": "yes"},
 62 |             ],
 63 |         },
 64 |         {
 65 |             "name": "new_users_returning",
 66 |             "type": "count",
 67 |             "filters": [
 68 |                 {"new_last_week": "yes"},
 69 |                 {"active_last_week": "yes"},
 70 |                 {"active_this_week": "yes"},
 71 |             ],
 72 |         },
 73 |         {
 74 |             "name": "new_users_churned_count",
 75 |             "type": "count",
 76 |             "filters": [
 77 |                 {"new_last_week": "yes"},
 78 |                 {"active_last_week": "yes"},
 79 |                 {"active_this_week": "no"},
 80 |             ],
 81 |         },
 82 |         {
 83 |             "name": "established_users_churned_count",
 84 |             "type": "count",
 85 |             "filters": [
 86 |                 {"new_last_week": "no"},
 87 |                 {"new_this_week": "no"},
 88 |                 {"active_last_week": "yes"},
 89 |                 {"active_this_week": "no"},
 90 |             ],
 91 |         },
 92 |         {
 93 |             "name": "new_users_churned",
 94 |             "type": "number",
 95 |             "sql": "-1 * ${new_users_churned_count}",
 96 |         },
 97 |         {
 98 |             "name": "established_users_churned",
 99 |             "type": "number",
100 |             "sql": "-1 * ${established_users_churned_count}",
101 |         },
102 |         {
103 |             "name": "overall_churned",
104 |             "type": "number",
105 |             "sql": "${new_users_churned} + ${established_users_churned}",
106 |         },
107 |         {
108 |             "name": "overall_retention_rate",
109 |             "type": "number",
110 |             "sql": (
111 |                 "SAFE_DIVIDE("
112 |                 "(${established_users_returning} + ${new_users_returning}),"
113 |                 "${overall_active_previous}"
114 |                 ")"
115 |             ),
116 |         },
117 |         {
118 |             "name": "established_user_retention_rate",
119 |             "type": "number",
120 |             "sql": (
121 |                 "SAFE_DIVIDE("
122 |                 "${established_users_returning},"
123 |                 "(${established_users_returning} + ${established_users_churned_count})"
124 |                 ")"
125 |             ),
126 |         },
127 |         {
128 |             "name": "new_user_retention_rate",
129 |             "type": "number",
130 |             "sql": (
131 |                 "SAFE_DIVIDE("
132 |                 "${new_users_returning},"
133 |                 "(${new_users_returning} + ${new_users_churned_count})"
134 |                 ")"
135 |             ),
136 |         },
137 |         {
138 |             "name": "overall_churn_rate",
139 |             "type": "number",
140 |             "sql": (
141 |                 "SAFE_DIVIDE("
142 |                 "(${established_users_churned_count} + ${new_users_churned_count}),"
143 |                 "${overall_active_previous}"
144 |                 ")"
145 |             ),
146 |         },
147 |         {
148 |             "name": "fraction_of_active_resurrected",
149 |             "type": "number",
150 |             "sql": "SAFE_DIVIDE(${overall_resurrected}, ${overall_active_current})",
151 |         },
152 |         {
153 |             "name": "fraction_of_active_new",
154 |             "type": "number",
155 |             "sql": "SAFE_DIVIDE(${new_users}, ${overall_active_current})",
156 |         },
157 |         {
158 |             "name": "fraction_of_active_established_returning",
159 |             "type": "number",
160 |             "sql": (
161 |                 "SAFE_DIVIDE("
162 |                 "${established_users_returning},"
163 |                 "${overall_active_current}"
164 |                 ")"
165 |             ),
166 |         },
167 |         {
168 |             "name": "fraction_of_active_new_returning",
169 |             "type": "number",
170 |             "sql": "SAFE_DIVIDE(${new_users_returning}, ${overall_active_current})",
171 |         },
172 |         {
173 |             "name": "quick_ratio",
174 |             "type": "number",
175 |             "sql": (
176 |                 "SAFE_DIVIDE("
177 |                 "${new_users} + ${overall_resurrected},"
178 |                 "${established_users_churned_count} + ${new_users_churned_count}"
179 |                 ")"
180 |             ),
181 |         },
182 |     ]
183 | 
184 |     def __init__(
185 |         self,
186 |         namespace: str,
187 |         tables: List[Dict[str, str]],
188 |         identifier_field: str = DEFAULT_IDENTIFIER_FIELD,
189 |     ):
190 |         """Get an instance of a GrowthAccountingView."""
191 |         self.identifier_field = identifier_field
192 | 
193 |         super().__init__(
194 |             namespace, "growth_accounting", GrowthAccountingView.type, tables
195 |         )
196 | 
197 |     @classmethod
198 |     def get_default_dimensions(
199 |         klass, identifier_field: str = DEFAULT_IDENTIFIER_FIELD
200 |     ) -> List[Dict[str, str]]:
201 |         """Get dimensions to be added to GrowthAccountingView by default."""
202 |         return [
203 |             {
204 |                 "name": "active_this_week",
205 |                 "sql": "mozfun.bits28.active_in_range(days_seen_bits, -6, 7)",
206 |                 "type": "yesno",
207 |                 "hidden": "yes",
208 |             },
209 |             {
210 |                 "name": "active_last_week",
211 |                 "sql": "mozfun.bits28.active_in_range(days_seen_bits, -13, 7)",
212 |                 "type": "yesno",
213 |                 "hidden": "yes",
214 |             },
215 |             {
216 |                 "name": "new_this_week",
217 |                 "sql": "DATE_DIFF(${submission_date}, first_run_date, DAY) BETWEEN 0 AND 6",
218 |                 "type": "yesno",
219 |                 "hidden": "yes",
220 |             },
221 |             {
222 |                 "name": "new_last_week",
223 |                 "sql": "DATE_DIFF(${submission_date}, first_run_date, DAY) BETWEEN 7 AND 13",
224 |                 "type": "yesno",
225 |                 "hidden": "yes",
226 |             },
227 |             {
228 |                 "name": f"{identifier_field}_day",
229 |                 "sql": f"CONCAT(CAST(${{TABLE}}.submission_date AS STRING), ${{{identifier_field}}})",
230 |                 "type": "string",
231 |                 "hidden": "yes",
232 |                 "primary_key": "yes",
233 |             },
234 |         ]
235 | 
236 |     @classmethod
237 |     def from_db_views(
238 |         klass,
239 |         namespace: str,
240 |         is_glean: bool,
241 |         channels: List[Dict[str, str]],
242 |         db_views: dict,
243 |         identifier_field: str = DEFAULT_IDENTIFIER_FIELD,
244 |     ) -> Iterator[GrowthAccountingView]:
245 |         """Get Growth Accounting Views from db views and app variants."""
246 |         dataset = next(
247 |             (channel for channel in channels if channel.get("channel") == "release"),
248 |             channels[0],
249 |         )["dataset"]
250 | 
251 |         for view_id, references in db_views[dataset].items():
252 |             if view_id == "baseline_clients_last_seen":
253 |                 yield GrowthAccountingView(
254 |                     namespace,
255 |                     [{"table": f"mozdata.{dataset}.{view_id}"}],
256 |                     identifier_field=identifier_field,
257 |                 )
258 | 
259 |     @classmethod
260 |     def from_dict(
261 |         klass, namespace: str, name: str, _dict: ViewDict
262 |     ) -> GrowthAccountingView:
263 |         """Get a view from a name and dict definition."""
264 |         return GrowthAccountingView(
265 |             namespace,
266 |             _dict["tables"],
267 |             identifier_field=str(
268 |                 _dict.get(
269 |                     "identifier_field", GrowthAccountingView.DEFAULT_IDENTIFIER_FIELD
270 |                 )
271 |             ),
272 |         )
273 | 
274 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
275 |         """Generate LookML for this view."""
276 |         view_defn: Dict[str, Any] = {"name": self.name}
277 |         table = self.tables[0]["table"]
278 | 
279 |         # add dimensions and dimension groups
280 |         dimensions = lookml_utils._generate_dimensions(table, dryrun=dryrun) + deepcopy(
281 |             GrowthAccountingView.get_default_dimensions(
282 |                 identifier_field=self.identifier_field
283 |             )
284 |         )
285 | 
286 |         view_defn["dimensions"] = list(
287 |             filterfalse(lookml_utils._is_dimension_group, dimensions)
288 |         )
289 |         view_defn["dimension_groups"] = list(
290 |             filter(lookml_utils._is_dimension_group, dimensions)
291 |         )
292 | 
293 |         # add measures
294 |         view_defn["measures"] = self.get_measures()
295 | 
296 |         # SQL Table Name
297 |         view_defn["sql_table_name"] = f"`{table}`"
298 | 
299 |         return {"views": [view_defn]}
300 | 
301 |     def get_measures(self) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
302 |         """Generate measures for the Growth Accounting Framework."""
303 |         return deepcopy(GrowthAccountingView.default_measures)
304 | 


--------------------------------------------------------------------------------
/generator/views/operational_monitoring_alerting_view.py:
--------------------------------------------------------------------------------
 1 | """Class to describe an Operational Monitoring Alert View."""
 2 | 
 3 | from typing import Any, Dict, Optional
 4 | 
 5 | from . import lookml_utils
 6 | from .operational_monitoring_view import OperationalMonitoringView
 7 | 
 8 | 
 9 | class OperationalMonitoringAlertingView(OperationalMonitoringView):
10 |     """A view on a alert operational monitoring table."""
11 | 
12 |     type: str = "operational_monitoring_alerting_view"
13 | 
14 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
15 |         """Get this view as LookML."""
16 |         if len(self.tables) == 0:
17 |             raise Exception((f"Operational Monitoring view {self.name} has no tables"))
18 | 
19 |         reference_table = self.tables[0]["table"]
20 |         dimensions = [
21 |             d
22 |             for d in lookml_utils._generate_dimensions(reference_table, dryrun=dryrun)
23 |             if d["name"] != "submission"
24 |         ]
25 | 
26 |         dimensions.append(
27 |             {
28 |                 "name": "submission_date",
29 |                 "type": "date",
30 |                 "sql": "${TABLE}.submission_date",
31 |                 "datatype": "date",
32 |                 "convert_tz": "no",
33 |             }
34 |         )
35 | 
36 |         dimensions.append(
37 |             {
38 |                 "name": "build_id_date",
39 |                 "type": "date",
40 |                 "hidden": "yes",
41 |                 "sql": "PARSE_DATE('%Y%m%d', CAST(${TABLE}.build_id AS STRING))",
42 |                 "datatype": "date",
43 |                 "convert_tz": "no",
44 |             }
45 |         )
46 | 
47 |         return {
48 |             "views": [
49 |                 {
50 |                     "name": self.name,
51 |                     "sql_table_name": f"`{reference_table}`",
52 |                     "dimensions": dimensions,
53 |                     "measures": [
54 |                         {"name": "errors", "type": "number", "sql": "COUNT(*)"}
55 |                     ],
56 |                 }
57 |             ]
58 |         }
59 | 


--------------------------------------------------------------------------------
/generator/views/operational_monitoring_view.py:
--------------------------------------------------------------------------------
 1 | """Class to describe an Operational Monitoring View."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any, Dict, List, Optional, Union
 6 | 
 7 | from . import lookml_utils
 8 | from .ping_view import PingView
 9 | from .view import ViewDict
10 | 
11 | ALLOWED_DIMENSIONS = {
12 |     "branch",
13 |     "metric",
14 |     "statistic",
15 |     "parameter",
16 | }
17 | 
18 | 
19 | class OperationalMonitoringView(PingView):
20 |     """A view on a operational monitoring table."""
21 | 
22 |     type: str = "operational_monitoring_view"
23 | 
24 |     def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
25 |         """Create instance of a OperationalMonitoringView."""
26 |         super().__init__(namespace, name, tables)
27 |         xaxis = "build_id"
28 |         if "xaxis" in tables[0] and len(tables) > 0:
29 |             xaxis = tables[0]["xaxis"]
30 | 
31 |         xaxis_to_sql_mapping = {
32 |             "build_id": f"PARSE_DATE('%Y%m%d', CAST(${{TABLE}}.{xaxis} AS STRING))",
33 |             "submission_date": f"${{TABLE}}.{xaxis}",
34 |         }
35 |         self.dimensions: List[Dict[str, str]] = [
36 |             {
37 |                 "name": xaxis,
38 |                 "type": "date",
39 |                 "sql": xaxis_to_sql_mapping[xaxis],
40 |                 "datatype": "date",
41 |                 "convert_tz": "no",
42 |             }
43 |         ]
44 | 
45 |     @classmethod
46 |     def from_dict(
47 |         klass, namespace: str, name: str, _dict: ViewDict
48 |     ) -> OperationalMonitoringView:
49 |         """Get a OperationalMonitoringView from a dict representation."""
50 |         return klass(namespace, name, _dict["tables"])
51 | 
52 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
53 |         """Get this view as LookML."""
54 |         if len(self.tables) == 0:
55 |             raise Exception((f"Operational Monitoring view {self.name} has no tables"))
56 | 
57 |         reference_table = self.tables[0]["table"]
58 |         all_dimensions = lookml_utils._generate_dimensions(
59 |             reference_table, dryrun=dryrun
60 |         )
61 | 
62 |         filtered_dimensions = [
63 |             d
64 |             for d in all_dimensions
65 |             if d["name"] in ALLOWED_DIMENSIONS
66 |             or d["name"] in self.tables[0].get("dimensions", {}).keys()
67 |         ]
68 |         self.dimensions.extend(filtered_dimensions)
69 | 
70 |         return {
71 |             "views": [
72 |                 {
73 |                     "name": self.name,
74 |                     "sql_table_name": reference_table,
75 |                     "dimensions": self.dimensions,
76 |                     "measures": self.get_measures(
77 |                         self.dimensions, reference_table, v1_name
78 |                     ),
79 |                 }
80 |             ]
81 |         }
82 | 
83 |     def get_measures(
84 |         self, dimensions: List[dict], table: str, v1_name: Optional[str]
85 |     ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
86 |         """Get OpMon measures."""
87 |         return [
88 |             {"name": "point", "type": "sum", "sql": "${TABLE}.point"},
89 |             {"name": "upper", "type": "sum", "sql": "${TABLE}.upper"},
90 |             {"name": "lower", "type": "sum", "sql": "${TABLE}.lower"},
91 |         ]
92 | 


--------------------------------------------------------------------------------
/generator/views/ping_view.py:
--------------------------------------------------------------------------------
  1 | """Class to describe a Ping View."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from collections import defaultdict
  6 | from typing import Any, Dict, Iterator, List, Optional, Union
  7 | 
  8 | from . import lookml_utils
  9 | from .view import OMIT_VIEWS, View, ViewDict
 10 | 
 11 | 
 12 | class PingView(View):
 13 |     """A view on a ping table."""
 14 | 
 15 |     type: str = "ping_view"
 16 |     allow_glean: bool = False
 17 | 
 18 |     def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
 19 |         """Create instance of a PingView."""
 20 |         super().__init__(namespace, name, self.__class__.type, tables)
 21 | 
 22 |     @classmethod
 23 |     def from_db_views(
 24 |         klass,
 25 |         namespace: str,
 26 |         is_glean: bool,
 27 |         channels: List[Dict[str, str]],
 28 |         db_views: dict,
 29 |     ) -> Iterator[PingView]:
 30 |         """Get Looker views for a namespace."""
 31 |         if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
 32 |             return
 33 | 
 34 |         view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
 35 |         for channel in channels:
 36 |             dataset = channel["dataset"]
 37 | 
 38 |             for view_id, references in db_views[dataset].items():
 39 |                 if view_id in OMIT_VIEWS:
 40 |                     continue
 41 | 
 42 |                 table_id = f"mozdata.{dataset}.{view_id}"
 43 |                 table: Dict[str, str] = {"table": table_id}
 44 |                 if channel.get("channel") is not None:
 45 |                     table["channel"] = channel["channel"]
 46 | 
 47 |                 # Only include those that select from a single ping source table
 48 |                 # or union together multiple ping source tables of the same name.
 49 |                 reference_table_names = set(r[-1] for r in references)
 50 |                 reference_dataset_names = set(r[-2] for r in references)
 51 |                 if (
 52 |                     len(reference_table_names) != 1
 53 |                     or channel["source_dataset"] not in reference_dataset_names
 54 |                 ):
 55 |                     continue
 56 | 
 57 |                 view_tables[view_id][table_id] = table
 58 | 
 59 |         for view_id, tables_by_id in view_tables.items():
 60 |             yield klass(namespace, view_id, list(tables_by_id.values()))
 61 | 
 62 |     @classmethod
 63 |     def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
 64 |         """Get a view from a name and dict definition."""
 65 |         return klass(namespace, name, _dict["tables"])
 66 | 
 67 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 68 |         """Generate LookML for this view."""
 69 |         view_defn: Dict[str, Any] = {"name": self.name}
 70 | 
 71 |         # use schema for the table where channel=="release" or the first one
 72 |         table = next(
 73 |             (table for table in self.tables if table.get("channel") == "release"),
 74 |             self.tables[0],
 75 |         )["table"]
 76 | 
 77 |         dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 78 | 
 79 |         # set document id field as a primary key for joins
 80 |         view_defn["dimensions"] = [
 81 |             d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 82 |             for d in dimensions
 83 |             if not lookml_utils._is_dimension_group(d)
 84 |         ]
 85 |         view_defn["dimension_groups"] = [
 86 |             d for d in dimensions if lookml_utils._is_dimension_group(d)
 87 |         ]
 88 | 
 89 |         # add measures
 90 |         view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
 91 | 
 92 |         [project, dataset, table_id] = table.split(".")
 93 |         table_schema = dryrun.create(
 94 |             project=project,
 95 |             dataset=dataset,
 96 |             table=table_id,
 97 |         ).get_table_schema()
 98 |         nested_views = lookml_utils._generate_nested_dimension_views(
 99 |             table_schema, self.name
100 |         )
101 | 
102 |         # Round-tripping through a dict to get an ordered deduped list.
103 |         suggestions = list(
104 |             dict.fromkeys(
105 |                 _table["channel"] for _table in self.tables if "channel" in _table
106 |             )
107 |         )
108 | 
109 |         if len(suggestions) > 1:
110 |             view_defn["filters"] = [
111 |                 {
112 |                     "name": "channel",
113 |                     "type": "string",
114 |                     "description": "Filter by the app's channel",
115 |                     "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
116 |                     "default_value": suggestions[0],
117 |                     "suggestions": suggestions,
118 |                 }
119 |             ]
120 | 
121 |         view_defn["sql_table_name"] = f"`{table}`"
122 | 
123 |         return {"views": [view_defn] + nested_views}
124 | 
125 |     def get_dimensions(
126 |         self, table, v1_name: Optional[str], dryrun
127 |     ) -> List[Dict[str, Any]]:
128 |         """Get the set of dimensions for this view."""
129 |         # add dimensions and dimension groups
130 |         return lookml_utils._generate_dimensions(table, dryrun=dryrun)
131 | 
132 |     def get_measures(
133 |         self, dimensions: List[dict], table: str, v1_name: Optional[str]
134 |     ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
135 |         """Generate measures from a list of dimensions.
136 | 
137 |         When no dimension-specific measures are found, return a single "count" measure.
138 | 
139 |         Raise ClickException if dimensions result in duplicate measures.
140 |         """
141 |         # Iterate through each of the dimensions and accumulate any measures
142 |         # that we want to include in the view. We pull out the client id first
143 |         # since we'll use it to calculate per-measure client counts.
144 |         measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
145 | 
146 |         client_id_field = self.get_client_id(dimensions, table)
147 |         if client_id_field is not None:
148 |             measures.append(
149 |                 {
150 |                     "name": "clients",
151 |                     "type": "count_distinct",
152 |                     "sql": f"${{{client_id_field}}}",
153 |                 }
154 |             )
155 | 
156 |         for dimension in dimensions:
157 |             dimension_name = dimension["name"]
158 |             if dimension_name == "document_id":
159 |                 measures += [{"name": "ping_count", "type": "count"}]
160 | 
161 |         return measures
162 | 


--------------------------------------------------------------------------------
/generator/views/table_view.py:
--------------------------------------------------------------------------------
  1 | """Class to describe a Table View."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from collections import defaultdict
  6 | from itertools import filterfalse
  7 | from typing import Any, Dict, Iterator, List, Optional, Set
  8 | 
  9 | from click import ClickException
 10 | 
 11 | from . import lookml_utils
 12 | from .view import OMIT_VIEWS, View, ViewDict
 13 | 
 14 | 
 15 | class TableView(View):
 16 |     """A view on any table."""
 17 | 
 18 |     type: str = "table_view"
 19 |     measures: Optional[Dict[str, Dict[str, Any]]]
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         namespace: str,
 24 |         name: str,
 25 |         tables: List[Dict[str, str]],
 26 |         measures: Optional[Dict[str, Dict[str, Any]]] = None,
 27 |     ):
 28 |         """Create instance of a TableView."""
 29 |         super().__init__(namespace, name, TableView.type, tables)
 30 |         self.measures = measures
 31 | 
 32 |     @classmethod
 33 |     def from_db_views(
 34 |         klass,
 35 |         namespace: str,
 36 |         is_glean: bool,
 37 |         channels: List[Dict[str, str]],
 38 |         db_views: dict,
 39 |     ) -> Iterator[TableView]:
 40 |         """Get Looker views for a namespace."""
 41 |         view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
 42 |         for channel in channels:
 43 |             dataset = channel["dataset"]
 44 | 
 45 |             for view_id, references in db_views[dataset].items():
 46 |                 if view_id in OMIT_VIEWS:
 47 |                     continue
 48 | 
 49 |                 table_id = f"mozdata.{dataset}.{view_id}"
 50 |                 table: Dict[str, str] = {"table": table_id}
 51 |                 if "channel" in channel:
 52 |                     table["channel"] = channel["channel"]
 53 | 
 54 |                 view_tables[view_id][table_id] = table
 55 | 
 56 |         for view_id, tables_by_id in view_tables.items():
 57 |             yield TableView(namespace, f"{view_id}_table", list(tables_by_id.values()))
 58 | 
 59 |     @classmethod
 60 |     def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> TableView:
 61 |         """Get a view from a name and dict definition."""
 62 |         return TableView(namespace, name, _dict["tables"], _dict.get("measures"))
 63 | 
 64 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 65 |         """Generate LookML for this view."""
 66 |         view_defn: Dict[str, Any] = {"name": self.name}
 67 | 
 68 |         # use schema for the table where channel=="release" or the first one
 69 |         table = next(
 70 |             (table for table in self.tables if table.get("channel") == "release"),
 71 |             self.tables[0],
 72 |         )["table"]
 73 | 
 74 |         # add dimensions and dimension groups
 75 |         dimensions = lookml_utils._generate_dimensions(table, dryrun=dryrun)
 76 |         view_defn["dimensions"] = list(
 77 |             filterfalse(lookml_utils._is_dimension_group, dimensions)
 78 |         )
 79 |         view_defn["dimension_groups"] = list(
 80 |             filter(lookml_utils._is_dimension_group, dimensions)
 81 |         )
 82 | 
 83 |         # add tag "time_partitioning_field"
 84 |         time_partitioning_fields: Set[str] = set(
 85 |             # filter out falsy values
 86 |             filter(
 87 |                 None, (table.get("time_partitioning_field") for table in self.tables)
 88 |             )
 89 |         )
 90 |         if len(time_partitioning_fields) > 1:
 91 |             raise ClickException(f"Multiple time_partitioning_fields for {self.name!r}")
 92 |         elif len(time_partitioning_fields) == 1:
 93 |             field_name = time_partitioning_fields.pop()
 94 |             sql = f"${{TABLE}}.{field_name}"
 95 |             for group_defn in view_defn["dimension_groups"]:
 96 |                 if group_defn["sql"] == sql:
 97 |                     if "tags" not in group_defn:
 98 |                         group_defn["tags"] = []
 99 |                     group_defn["tags"].append("time_partitioning_field")
100 |                     break
101 |             else:
102 |                 raise ClickException(
103 |                     f"time_partitioning_field {field_name!r} not found in {self.name!r}"
104 |                 )
105 | 
106 |         [project, dataset, table_id] = table.split(".")
107 |         table_schema = dryrun.create(
108 |             project=project,
109 |             dataset=dataset,
110 |             table=table_id,
111 |         ).get_table_schema()
112 |         nested_views = lookml_utils._generate_nested_dimension_views(
113 |             table_schema, self.name
114 |         )
115 | 
116 |         if self.measures:
117 |             view_defn["measures"] = [
118 |                 {"name": measure_name, **measure_parameters}
119 |                 for measure_name, measure_parameters in self.measures.items()
120 |             ]
121 | 
122 |         # parameterize table name
123 |         if len(self.tables) > 1:
124 |             view_defn["parameters"] = [
125 |                 {
126 |                     "name": "channel",
127 |                     "type": "unquoted",
128 |                     "default_value": table,
129 |                     "allowed_values": [
130 |                         {
131 |                             "label": _table["channel"].title(),
132 |                             "value": _table["table"],
133 |                         }
134 |                         for _table in self.tables
135 |                     ],
136 |                 }
137 |             ]
138 |             view_defn["sql_table_name"] = "`{% parameter channel %}`"
139 |         else:
140 |             view_defn["sql_table_name"] = f"`{table}`"
141 | 
142 |         return {"views": [view_defn] + nested_views}
143 | 


--------------------------------------------------------------------------------
/generator/views/view.py:
--------------------------------------------------------------------------------
  1 | """Generic class to describe Looker views."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import Any, Dict, Iterator, List, Optional, Set, TypedDict
  6 | 
  7 | from click import ClickException
  8 | 
  9 | OMIT_VIEWS: Set[str] = set()
 10 | 
 11 | 
 12 | # TODO: Once we upgrade to Python 3.11 mark just `measures` as non-required, not all keys.
 13 | class ViewDict(TypedDict, total=False):
 14 |     """Represent a view definition."""
 15 | 
 16 |     type: str
 17 |     tables: List[Dict[str, str]]
 18 |     measures: Dict[str, Dict[str, Any]]
 19 | 
 20 | 
 21 | class View(object):
 22 |     """A generic Looker View."""
 23 | 
 24 |     name: str
 25 |     view_type: str
 26 |     tables: List[Dict[str, Any]]
 27 |     namespace: str
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         namespace: str,
 32 |         name: str,
 33 |         view_type: str,
 34 |         tables: List[Dict[str, Any]],
 35 |         **kwargs,
 36 |     ):
 37 |         """Create an instance of a view."""
 38 |         self.namespace = namespace
 39 |         self.tables = tables
 40 |         self.name = name
 41 |         self.view_type = view_type
 42 | 
 43 |     @classmethod
 44 |     def from_db_views(
 45 |         klass,
 46 |         namespace: str,
 47 |         is_glean: bool,
 48 |         channels: List[Dict[str, str]],
 49 |         db_views: dict,
 50 |     ) -> Iterator[View]:
 51 |         """Get Looker views from app."""
 52 |         raise NotImplementedError("Only implemented in subclass.")
 53 | 
 54 |     @classmethod
 55 |     def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> View:
 56 |         """Get a view from a name and dict definition."""
 57 |         raise NotImplementedError("Only implemented in subclass.")
 58 | 
 59 |     def get_type(self) -> str:
 60 |         """Get the type of this view."""
 61 |         return self.view_type
 62 | 
 63 |     def as_dict(self) -> dict:
 64 |         """Get this view as a dictionary."""
 65 |         return {
 66 |             "type": self.view_type,
 67 |             "tables": self.tables,
 68 |         }
 69 | 
 70 |     def __str__(self):
 71 |         """Stringify."""
 72 |         return f"name: {self.name}, type: {self.type}, table: {self.tables}, namespace: {self.namespace}"
 73 | 
 74 |     def __eq__(self, other) -> bool:
 75 |         """Check for equality with other View."""
 76 | 
 77 |         def comparable_dict(d):
 78 |             return {tuple(sorted([(k, str(v)) for k, v in t.items()])) for t in d}
 79 | 
 80 |         if isinstance(other, View):
 81 |             return (
 82 |                 self.name == other.name
 83 |                 and self.view_type == other.view_type
 84 |                 and comparable_dict(self.tables) == comparable_dict(other.tables)
 85 |                 and self.namespace == other.namespace
 86 |             )
 87 |         return False
 88 | 
 89 |     def get_dimensions(
 90 |         self, table, v1_name: Optional[str], dryrun
 91 |     ) -> List[Dict[str, Any]]:
 92 |         """Get the set of dimensions for this view."""
 93 |         raise NotImplementedError("Only implemented in subclass.")
 94 | 
 95 |     def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 96 |         """
 97 |         Generate Lookml for this view.
 98 | 
 99 |         View instances can generate more than one Looker view,
100 |         for e.g. nested fields and joins, so this returns
101 |         a list.
102 |         """
103 |         raise NotImplementedError("Only implemented in subclass.")
104 | 
105 |     def get_client_id(self, dimensions: List[dict], table: str) -> Optional[str]:
106 |         """Return the first field that looks like a client identifier."""
107 |         client_id_fields = self.select_dimension(
108 |             {"client_id", "client_info__client_id", "context_id"},
109 |             dimensions,
110 |             table,
111 |         )
112 |         # Some pings purposely disinclude client_ids, e.g. firefox installer
113 |         return client_id_fields["name"] if client_id_fields else None
114 | 
115 |     def get_document_id(self, dimensions: List[dict], table: str) -> Optional[str]:
116 |         """Return the first field that looks like a document_id."""
117 |         document_id = self.select_dimension("document_id", dimensions, table)
118 |         return document_id["name"] if document_id else None
119 | 
120 |     def select_dimension(
121 |         self,
122 |         dimension_names: str | set[str],
123 |         dimensions: List[dict],
124 |         table: str,
125 |     ) -> Optional[dict[str, str]]:
126 |         """
127 |         Return the first field that matches dimension name.
128 | 
129 |         Throws if the query set is greater than one and more than one item is selected.
130 |         """
131 |         if isinstance(dimension_names, str):
132 |             dimension_names = {dimension_names}
133 |         selected = [d for d in dimensions if d["name"] in dimension_names]
134 |         if selected:
135 |             # there should only be one dimension selected from the set
136 |             # if there are multiple options in the dimention_names set.
137 |             if len(dimension_names) > 1 and len(selected) > 1:
138 |                 raise ClickException(
139 |                     f"Duplicate {'/'.join(dimension_names)} dimension in {table!r}"
140 |                 )
141 |             return selected[0]
142 |         return None
143 | 


--------------------------------------------------------------------------------
/namespaces-disallowlist.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - burnham
  3 | - firefox_echo_show
  4 | - firefox_fire_tv
  5 | - firefox_reality
  6 | - firefox_reality_pc
  7 | - lockwise_android
  8 | - lockwise_ios
  9 | - mlhackweek_search
 10 | - pine
 11 | - pioneer_*
 12 | - rally_*
 13 | - reference_browser
 14 | - tiktokreporter_*
 15 | - org_mozilla_ios_tiktok_reporter*
 16 | - org_mozilla_tiktokreporter
 17 | - moso_*
 18 | - regrets_reporter
 19 | - mozphab
 20 | - mozregression
 21 | - pine
 22 | - treeherder
 23 | - sync
 24 | - mozillavpn_backend_cirrus
 25 | - glean_dictionary
 26 | - mach
 27 | - "*_cirrus"
 28 | - bedrock
 29 | - firefox_desktop_background_tasks
 30 | - hubs
 31 | - gleanjs_docs
 32 | - fakespot
 33 | - review_checker_desktop
 34 | - review_checker
 35 | - debug_ping_view
 36 | - firefox_crashreporter
 37 | - firefox_desktop_background_defaultagent
 38 | - thunderbird_android
 39 | - fenix:
 40 |     views:
 41 |       - installation
 42 |       - installation_table
 43 |       - topsites_impression
 44 |       - topsites_impression_table
 45 |     explores:
 46 |       - installation
 47 |       - topsites_impression
 48 | - firefox_ios:
 49 |     views:
 50 |       - temp_bookmarks_sync
 51 |       - temp_bookmarks_sync_table
 52 |       - temp_clients_sync
 53 |       - temp_clients_sync_table
 54 |       - temp_credit_cards_sync
 55 |       - temp_credit_cards_sync_table
 56 |       - temp_history_sync
 57 |       - temp_history_sync_table
 58 |       - temp_logins_sync
 59 |       - temp_logins_sync_table
 60 |       - temp_rust_tabs_sync
 61 |       - temp_rust_tabs_sync_table
 62 |       - temp_sync
 63 |       - temp_sync_table
 64 |       - temp_tabs_sync
 65 |       - temp_tabs_sync_table
 66 |       - topsites_impression
 67 |       - topsites_impression_table
 68 |     explores:
 69 |       - temp_bookmarks_sync
 70 |       - temp_clients_sync
 71 |       - temp_credit_cards_sync
 72 |       - temp_history_sync
 73 |       - temp_logins_sync
 74 |       - temp_rust_tabs_sync
 75 |       - temp_sync
 76 |       - temp_tabs_sync
 77 |       - topsites_impression
 78 | - "*": # exclude these pings/views/explores from all namespaces
 79 |     views:
 80 |       - addresses_sync
 81 |       - addresses_sync_table
 82 |       - adjust_attribution
 83 |       - adjust_attribution_table
 84 |       - activation
 85 |       - activation_table
 86 |       - baseline_clients_daily
 87 |       - baseline_clients_first_seen
 88 |       - baseline_clients_first_seen_table
 89 |       - bounce_tracking_protection
 90 |       - bounce_tracking_protection_table
 91 |       - captcha_detection
 92 |       - captcha_detection_table
 93 |       - client_deduplication
 94 |       - client_deduplication_table
 95 |       - context_id_deletion_request
 96 |       - context_id_deletion_request_table
 97 |       - cookie_banner_report_site
 98 |       - cookie_banner_report_site_table
 99 |       - dau_reporting
100 |       - dau_reporting_table
101 |       - event_names
102 |       - feature_usage
103 |       - feature_usage_table
104 |       - fog_validation
105 |       - fog_validation_table
106 |       - font_list
107 |       - font_list_table
108 |       - hang_report
109 |       - hang_report_table
110 |       - heartbeat
111 |       - heartbeat_table
112 |       - home
113 |       - home_table
114 |       - logins_sync
115 |       - logins_sync_table
116 |       - new_metric_capture_emulation
117 |       - new_metric_capture_emulation_table
118 |       - nimbus
119 |       - nimbus_table
120 |       - pageload
121 |       - pageload_table
122 |       - pocket_button
123 |       - pocket_button_table
124 |       - startup_timeline
125 |       - startup_timeline_table
126 |       - tabs_sync
127 |       - tabs_sync_table
128 |       - usage_deletion_request
129 |       - usage_deletion_request_table
130 |       - usage_reporting_clients_*
131 |     explores:
132 |       - addresses_sync
133 |       - activation
134 |       - adjust_attribution
135 |       - baseline_clients_daily
136 |       - baseline_clients_first_seen
137 |       - baseline_clients_last_seen
138 |       - bounce_tracking_protection
139 |       - captcha_detection
140 |       - client_deduplication
141 |       - context_id_deletion_request
142 |       - cookie_banner_report_site
143 |       - dau_reporting
144 |       - event_names
145 |       - feature_usage
146 |       - fog_validation
147 |       - font_list
148 |       - hang_report
149 |       - heartbeat
150 |       - home
151 |       - logins_sync
152 |       - new_metric_capture_emulation
153 |       - nimbus
154 |       - pageload
155 |       - pocket_button
156 |       - startup_timeline
157 |       - tabs_sync
158 |       - usage_deletion_request
159 |       - usage_reporting_clients_*
160 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | addopts =
 3 |     --black
 4 |     --isort
 5 |     --mypy-ignore-missing-imports
 6 |     --pydocstyle
 7 |     --strict-markers
 8 | filterwarnings =
 9 | # upstream lib imports ABC improperly for backward compatibility
10 |     ignore::DeprecationWarning:google.protobuf.descriptor
11 |     ignore::DeprecationWarning:google.protobuf.internal.well_known_types
12 | # Silence: "Your application has authenticated using end user credentials from Google Cloud SDK"
13 |     ignore::UserWarning:google.auth
14 | markers =
15 |     integration: mark tests that check integration with external services. Skipped when not specifically enabled.
16 | norecursedirs =
17 |     venv
18 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
 1 | click==8.2.1
 2 | flake8==7.1.1 
 3 | exceptiongroup  # for pytest on python<=3.10
 4 | google-cloud-bigquery==3.34.0
 5 | google-cloud-storage==3.2.0
 6 | Jinja2==3.1.6
 7 | lkml==1.3.7
 8 | looker-sdk==25.10.0
 9 | mozilla-metric-config-parser==2025.7.1
10 | mozilla-nimbus-schemas==3001.0.0
11 | mozilla-schema-generator==0.5.1
12 | pandas==2.3.1
13 | pip-tools==7.4.1
14 | pre-commit==4.2.0
15 | pyarrow==20.0.0
16 | pytest-black==0.6.0
17 | pytest-isort==4.0.0
18 | pytest-mypy==1.0.1
19 | pytest-pydocstyle==2.4.0
20 | pytest==7.4.4
21 | PyYAML==6.0.2
22 | tomli==2.2.1  # for toml parsing on python<3.11
23 | types-PyYaml==6.0.12.20250516
24 | yamllint==1.37.1
25 | gitpython==3.1.44
26 | spectacles==2.4.12
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Installation for lookml-generator."""
 2 | 
 3 | # -*- coding: utf-8 -*-
 4 | 
 5 | # This Source Code Form is subject to the terms of the Mozilla Public
 6 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 7 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 8 | 
 9 | from setuptools import find_packages, setup
10 | 
11 | readme = open("README.md").read()
12 | 
13 | setup(
14 |     name="lookml-generator",
15 |     python_requires=">=3.10.0",
16 |     version="0.0.0",
17 |     description="Generator LookML to represent Mozilla data.",
18 |     long_description=readme,
19 |     long_description_content_type="text/markdown",
20 |     author="Frank Bertsch",
21 |     author_email="frank@mozilla.com",
22 |     url="https://github.com/mozilla/lookml-generator",
23 |     packages=find_packages(include=["generator", "generator.*"]),
24 |     package_dir={"lookml-generator": "generator"},
25 |     entry_points={
26 |         "console_scripts": [
27 |             "lookml-generator=generator.__main__:main",
28 |         ]
29 |     },
30 |     include_package_data=True,
31 |     package_data={"generator": ["*/templates/*.lkml"]},
32 |     zip_safe=False,
33 |     keywords="lookml-generator",
34 |     classifiers=[
35 |         "Intended Audience :: Developers",
36 |         "Programming Language :: Python :: 3",
37 |         "Programming Language :: Python :: 3.10",
38 |     ],
39 | )
40 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests."""
2 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """PyTest configuration."""
  2 | 
  3 | import json
  4 | 
  5 | import pytest
  6 | 
  7 | 
  8 | def pytest_collection_modifyitems(config, items):
  9 |     """Skip integration tests unless a keyword or marker filter is specified."""
 10 |     keywordexpr = config.option.keyword
 11 |     markexpr = config.option.markexpr
 12 |     if keywordexpr or markexpr:
 13 |         return
 14 | 
 15 |     skip_integration = pytest.mark.skip(reason="integration marker not selected")
 16 | 
 17 |     for item in items:
 18 |         if "integration" in item.keywords:
 19 |             item.add_marker(skip_integration)
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | def app_listings_uri(tmp_path):
 24 |     """
 25 |     Mock app listings.
 26 | 
 27 |     See: https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings
 28 |     """
 29 |     dest = tmp_path / "app-listings"
 30 |     dest.write_bytes(
 31 |         json.dumps(
 32 |             [
 33 |                 {
 34 |                     "app_name": "glean-app",
 35 |                     "app_channel": "release",
 36 |                     "canonical_app_name": "Glean App",
 37 |                     "bq_dataset_family": "glean_app_release",
 38 |                     "notification_emails": ["glean-app-owner@allizom.com"],
 39 |                     "v1_name": "glean-app-release",
 40 |                 },
 41 |                 {
 42 |                     "app_name": "glean-app",
 43 |                     "app_channel": "beta",
 44 |                     "canonical_app_name": "Glean App Beta",
 45 |                     "bq_dataset_family": "glean_app_beta",
 46 |                     "notification_emails": ["glean-app-owner-beta@allizom.com"],
 47 |                     "v1_name": "glean-app-beta",
 48 |                 },
 49 |             ]
 50 |         ).encode()
 51 |     )
 52 |     return dest.absolute().as_uri()
 53 | 
 54 | 
 55 | @pytest.fixture
 56 | def metrics_listings_file(tmp_path):
 57 |     """Mock metrics listings."""
 58 |     dest = tmp_path / "metrics-listings"
 59 |     dest.write_bytes(
 60 |         json.dumps(
 61 |             {
 62 |                 "test.counter": {
 63 |                     "type": "counter",
 64 |                 },
 65 |                 "glean_validation_metrics.ping_count": {
 66 |                     "type": "counter",
 67 |                 },
 68 |             }
 69 |         ).encode()
 70 |     )
 71 |     return dest.absolute()
 72 | 
 73 | 
 74 | @pytest.fixture
 75 | def glean_apps():
 76 |     """Mock processed version of app listings (see above)."""
 77 |     return [
 78 |         {
 79 |             "name": "glean-app",
 80 |             "glean_app": True,
 81 |             "pretty_name": "Glean App",
 82 |             "owners": [
 83 |                 "glean-app-owner@allizom.com",
 84 |             ],
 85 |             "channels": [
 86 |                 {
 87 |                     "channel": "release",
 88 |                     "dataset": "glean_app",
 89 |                     "source_dataset": "glean_app_release",
 90 |                 },
 91 |                 {
 92 |                     "channel": "beta",
 93 |                     "dataset": "glean_app_beta",
 94 |                     "source_dataset": "glean_app_beta_stable",
 95 |                 },
 96 |             ],
 97 |             "v1_name": "glean-app-release",
 98 |         }
 99 |     ]
100 | 


--------------------------------------------------------------------------------
/tests/data/metric-hub/definitions/fenix.toml:
--------------------------------------------------------------------------------
  1 | [metrics]
  2 | 
  3 | [metrics.uri_count]
  4 | data_source = "baseline"
  5 | select_expression = '{{agg_sum("metrics.counter.events_total_uri_count")}}'
  6 | friendly_name = "URIs visited"
  7 | description = "Counts the number of URIs each client visited"
  8 | 
  9 | [metrics.active_hours]
 10 | select_expression = "COALESCE(SUM(metrics.timespan.glean_baseline_duration.value), 0) / 3600.0"
 11 | data_source = "baseline"
 12 | friendly_name = "Active Hours"
 13 | description = "Total time Firefox was active"
 14 | 
 15 | 
 16 | [metrics.performance_pageload_dcl]
 17 | data_source = "metrics"
 18 | select_expression = "ARRAY_AGG(metrics.timing_distribution.performance_pageload_dcl IGNORE NULLS)"
 19 | friendly_name = "Pageload DCL"
 20 | description = "Time in milliseconds from navigationStart to domContentLoaded for the foreground http or https root content document."
 21 | category = "performance"
 22 | type = "histogram"
 23 | 
 24 | [metrics.performance_pageload_dcl_responsestart]
 25 | data_source = "metrics"
 26 | select_expression = "ARRAY_AGG(metrics.timing_distribution.performance_pageload_dcl_responsestart IGNORE NULLS)"
 27 | friendly_name = "Pageload DCL Response Start"
 28 | description = "Time in milliseconds from responseStart to domContentLoaded for the foreground http or https root content document."
 29 | category = "performance"
 30 | type = "histogram"
 31 | 
 32 | [metrics.performance_pageload_fcp]
 33 | data_source = "metrics"
 34 | select_expression = "ARRAY_AGG(metrics.timing_distribution.performance_pageload_fcp IGNORE NULLS)"
 35 | friendly_name = "Pageload FCP"
 36 | description = "The time between navigationStart and the first contentful paint of a foreground http or https root content document, in milliseconds. The contentful paint timestamp is taken during display list building and does not include rasterization or compositing of that paint."
 37 | category = "performance"
 38 | type = "histogram"
 39 | 
 40 | 
 41 | [data_sources.baseline]
 42 | from_expression = """(
 43 |     SELECT
 44 |         p.*,
 45 |         DATE(p.submission_timestamp) AS submission_date
 46 |     FROM `moz-fx-data-shared-prod.{dataset}.baseline` p
 47 | )"""
 48 | client_id_column = "client_info.client_id"
 49 | experiments_column_type = "glean"
 50 | default_dataset = "org_mozilla_firefox"
 51 | friendly_name = "Baseline"
 52 | description = "Baseline Ping"
 53 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')"
 54 | 
 55 | [data_sources.baseline_v2]
 56 | from_expression = """(
 57 |     SELECT
 58 |         p.*,
 59 |         DATE(p.submission_timestamp) AS submission_date
 60 |     FROM `moz-fx-data-shared-prod.{dataset}.baseline` p
 61 | )"""
 62 | client_id_column = "client_info.client_id"
 63 | submission_date_column = "DATE(submission_timestamp)"
 64 | experiments_column_type = "glean"
 65 | default_dataset = "fenix"
 66 | friendly_name = "Baseline"
 67 | description = "Baseline Ping"
 68 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')"
 69 | 
 70 | [data_sources.events]
 71 | from_expression = """(
 72 |     SELECT
 73 |         p.* EXCEPT (events),
 74 |         DATE(p.submission_timestamp) AS submission_date,
 75 |         event
 76 |     FROM
 77 |         `moz-fx-data-shared-prod.{dataset}.events` p
 78 |     CROSS JOIN
 79 |         UNNEST(p.events) AS event
 80 | )"""
 81 | client_id_column = "client_info.client_id"
 82 | experiments_column_type = "glean"
 83 | default_dataset = "org_mozilla_firefox"
 84 | friendly_name = "Events"
 85 | description = "Events Ping"
 86 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')"
 87 | 
 88 | [data_sources.metrics]
 89 | from_expression = """(
 90 |     SELECT
 91 |         p.*,
 92 |         DATE(p.submission_timestamp) AS submission_date
 93 |     FROM `moz-fx-data-shared-prod.{dataset}.metrics` p
 94 | )"""
 95 | client_id_column = "client_info.client_id"
 96 | experiments_column_type = "glean"
 97 | default_dataset = "org_mozilla_firefox"
 98 | friendly_name = "Metrics"
 99 | description = "Metrics Ping"
100 | build_id_column = "REPLACE(CAST(DATE(mozfun.norm.fenix_build_to_datetime(client_info.app_build)) AS STRING), '-', '')"
101 | 


--------------------------------------------------------------------------------
/tests/test_events.py:
--------------------------------------------------------------------------------
  1 | import lkml
  2 | import pytest
  3 | 
  4 | from generator.explores import EventsExplore
  5 | from generator.views import EventsView
  6 | 
  7 | from .utils import MockDryRun, MockDryRunContext, print_and_test
  8 | 
  9 | 
 10 | class MockDryRunEvents(MockDryRun):
 11 |     """Mock dryrun.DryRun."""
 12 | 
 13 |     def get_table_schema(self):
 14 |         """Mock dryrun.DryRun.get_table_schema"""
 15 | 
 16 |         return [
 17 |             {
 18 |                 "name": "client_info",
 19 |                 "type": "RECORD",
 20 |                 "fields": [{"name": "client_id", "type": "STRING"}],
 21 |             },
 22 |             {"name": "event_id", "type": "STRING"},
 23 |         ]
 24 | 
 25 | 
 26 | @pytest.fixture()
 27 | def events_view():
 28 |     return EventsView(
 29 |         "glean_app",
 30 |         "events",
 31 |         [
 32 |             {
 33 |                 "events_table_view": "events_unnested_table",
 34 |                 "base_table": "mozdata.glean_app.events_unnested",
 35 |             },
 36 |         ],
 37 |     )
 38 | 
 39 | 
 40 | @pytest.fixture(params=["submission", "timestamp"])
 41 | def time_partitioning_group(request):
 42 |     return request.param
 43 | 
 44 | 
 45 | @pytest.fixture()
 46 | def events_explore(events_view, tmp_path, time_partitioning_group):
 47 |     (tmp_path / "events_unnested_table.view.lkml").write_text(
 48 |         lkml.dump(
 49 |             {
 50 |                 "views": [
 51 |                     {
 52 |                         "name": "events_unnested_table",
 53 |                         "dimensions": [
 54 |                             {
 55 |                                 "name": "client_info__client_count",
 56 |                                 "type": "string",
 57 |                             },
 58 |                         ],
 59 |                         "dimension_groups": [
 60 |                             {
 61 |                                 "name": time_partitioning_group,
 62 |                                 "tags": (
 63 |                                     ["time_partitioning_field"]
 64 |                                     if time_partitioning_group != "submission"
 65 |                                     else []
 66 |                                 ),
 67 |                                 "type": "time",
 68 |                                 "timeframes": [
 69 |                                     "raw",
 70 |                                     "time",
 71 |                                     "date",
 72 |                                 ],
 73 |                             }
 74 |                         ],
 75 |                     }
 76 |                 ]
 77 |             }
 78 |         )
 79 |     )
 80 |     (tmp_path / "events.view.lkml").write_text(
 81 |         lkml.dump(
 82 |             {
 83 |                 "views": [
 84 |                     {
 85 |                         "name": "events",
 86 |                         "measures": [
 87 |                             {
 88 |                                 "name": "event_count",
 89 |                                 "type": "count",
 90 |                             }
 91 |                         ],
 92 |                     }
 93 |                 ]
 94 |             }
 95 |         )
 96 |     )
 97 |     return EventsExplore(
 98 |         "events",
 99 |         {"base_view": "events", "extended_view": "events_unnested_table"},
100 |         tmp_path,
101 |     )
102 | 
103 | 
104 | def test_view_from_db_views(events_view):
105 |     db_views = {
106 |         "glean_app": {
107 |             "events": [["mozdata", "glean_app", "events"]],
108 |             "events_unnested": [["mozdata", "glean_app", "events_unnested"]],
109 |         }
110 |     }
111 | 
112 |     channels = [
113 |         {"channel": "release", "dataset": "glean_app"},
114 |         {"channel": "beta", "dataset": "glean_app_beta"},
115 |     ]
116 | 
117 |     actual = next(EventsView.from_db_views("glean_app", True, channels, db_views))
118 | 
119 |     assert actual == events_view
120 | 
121 | 
122 | def test_view_from_dict(events_view):
123 |     actual = EventsView.from_dict(
124 |         "glean_app",
125 |         "events",
126 |         {
127 |             "type": "events_view",
128 |             "tables": [
129 |                 {
130 |                     "events_table_view": "events_unnested_table",
131 |                     "base_table": "mozdata.glean_app.events_unnested",
132 |                 }
133 |             ],
134 |         },
135 |     )
136 | 
137 |     assert actual == events_view
138 | 
139 | 
140 | def test_explore_from_views(events_view, events_explore):
141 |     views = [events_view]
142 |     actual = next(EventsExplore.from_views(views))
143 | 
144 |     assert actual == events_explore
145 | 
146 | 
147 | def test_explore_from_dict(events_explore, tmp_path):
148 |     actual = EventsExplore.from_dict(
149 |         "events",
150 |         {"views": {"base_view": "events", "extended_view": "events_unnested_table"}},
151 |         tmp_path,
152 |     )
153 |     assert actual == events_explore
154 | 
155 | 
156 | def test_view_lookml(events_view):
157 |     expected = {
158 |         "includes": ["events_unnested_table.view.lkml"],
159 |         "views": [
160 |             {
161 |                 "name": "events",
162 |                 "extends": ["events_unnested_table"],
163 |                 "measures": [
164 |                     {
165 |                         "name": "event_count",
166 |                         "description": ("The number of times the event(s) occurred."),
167 |                         "type": "count",
168 |                     },
169 |                     {
170 |                         "name": "client_count",
171 |                         "description": (
172 |                             "The number of clients that completed the event(s)."
173 |                         ),
174 |                         "type": "count_distinct",
175 |                         "sql": "${client_info__client_id}",
176 |                     },
177 |                 ],
178 |                 "dimensions": [
179 |                     {
180 |                         "name": "event_id",
181 |                         "primary_key": "yes",
182 |                     },
183 |                 ],
184 |             },
185 |         ],
186 |     }
187 | 
188 |     mock_dryrun = MockDryRunContext(MockDryRunEvents, False)
189 | 
190 |     actual = events_view.to_lookml(None, dryrun=mock_dryrun)
191 |     print_and_test(expected=expected, actual=actual)
192 | 
193 | 
194 | def test_explore_lookml(time_partitioning_group, events_explore):
195 |     date_dimension = f"{time_partitioning_group}_date"
196 |     expected = [
197 |         {
198 |             "name": "event_counts",
199 |             "view_name": "events",
200 |             "description": "Event counts over time.",
201 |             "always_filter": {
202 |                 "filters": [
203 |                     {date_dimension: "28 days"},
204 |                 ]
205 |             },
206 |             "sql_always_where": f"${{events.{date_dimension}}} >= '2010-01-01'",
207 |             "queries": [
208 |                 {
209 |                     "description": "Event counts from all events over the past two weeks.",
210 |                     "dimensions": [date_dimension],
211 |                     "measures": ["event_count"],
212 |                     "filters": [
213 |                         {date_dimension: "14 days"},
214 |                     ],
215 |                     "name": "all_event_counts",
216 |                 },
217 |             ],
218 |             "joins": [],
219 |         },
220 |     ]
221 | 
222 |     actual = events_explore.to_lookml(None, None)
223 |     print_and_test(expected=expected, actual=actual)
224 | 


--------------------------------------------------------------------------------
/tests/test_glean_ping_view.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import Mock, patch
  2 | 
  3 | from mozilla_schema_generator.probes import GleanProbe
  4 | 
  5 | from generator.views import GleanPingView
  6 | 
  7 | from .utils import MockDryRun, MockDryRunContext
  8 | 
  9 | 
 10 | class MockDryRunPingView(MockDryRun):
 11 |     """Mock dryrun.DryRun."""
 12 | 
 13 |     def get_table_schema(self):
 14 |         """Mock dryrun.DryRun.get_table_schema"""
 15 |         table_id = f"{self.project}.{self.dataset}.{self.table}"
 16 | 
 17 |         if table_id == "mozdata.glean_app.dash_name":
 18 |             return [
 19 |                 {
 20 |                     "name": "metrics",
 21 |                     "type": "RECORD",
 22 |                     "fields": [
 23 |                         {
 24 |                             "name": "string",
 25 |                             "type": "RECORD",
 26 |                             "fields": [{"name": "fun_string_metric", "type": "STRING"}],
 27 |                         },
 28 |                         {
 29 |                             "name": "url2",
 30 |                             "type": "RECORD",
 31 |                             "fields": [{"name": "fun_url_metric", "type": "STRING"}],
 32 |                         },
 33 |                         {
 34 |                             "name": "datetime",
 35 |                             "type": "RECORD",
 36 |                             "fields": [
 37 |                                 {"name": "fun_datetime_metric", "type": "TIMESTAMP"}
 38 |                             ],
 39 |                         },
 40 |                         {
 41 |                             "name": "labeled_counter",
 42 |                             "type": "RECORD",
 43 |                             "fields": [
 44 |                                 {
 45 |                                     "name": "fun_counter_metric",
 46 |                                     "type": "STRING",
 47 |                                     "mode": "REPEATED",
 48 |                                     "fields": [
 49 |                                         {"name": "key", "type": "STRING"},
 50 |                                         {"name": "value", "type": "INT64"},
 51 |                                     ],
 52 |                                 }
 53 |                             ],
 54 |                         },
 55 |                     ],
 56 |                 }
 57 |             ]
 58 | 
 59 |         raise ValueError(f"Table not found: {table_id}")
 60 | 
 61 | 
 62 | @patch("generator.views.glean_ping_view.GleanPing")
 63 | def test_kebab_case(mock_glean_ping):
 64 |     """
 65 |     Tests that we handle metrics from kebab-case pings
 66 |     """
 67 |     mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}]
 68 |     glean_app = Mock()
 69 |     glean_app.get_probes.return_value = [
 70 |         GleanProbe(
 71 |             "fun.string_metric",
 72 |             {
 73 |                 "type": "string",
 74 |                 "history": [
 75 |                     {
 76 |                         "send_in_pings": ["dash-name"],
 77 |                         "dates": {
 78 |                             "first": "2020-01-01 00:00:00",
 79 |                             "last": "2020-01-02 00:00:00",
 80 |                         },
 81 |                     }
 82 |                 ],
 83 |                 "name": "string_metric",
 84 |             },
 85 |         ),
 86 |     ]
 87 |     mock_dryrun = MockDryRunContext(MockDryRunPingView, False)
 88 |     mock_glean_ping.return_value = glean_app
 89 |     view = GleanPingView(
 90 |         "glean_app",
 91 |         "dash_name",
 92 |         [{"channel": "release", "table": "mozdata.glean_app.dash_name"}],
 93 |     )
 94 |     lookml = view.to_lookml("glean-app", dryrun=mock_dryrun)
 95 |     assert len(lookml["views"]) == 1
 96 |     assert len(lookml["views"][0]["dimensions"]) == 1
 97 |     assert (
 98 |         lookml["views"][0]["dimensions"][0]["name"]
 99 |         == "metrics__string__fun_string_metric"
100 |     )
101 | 
102 | 
103 | @patch("generator.views.glean_ping_view.GleanPing")
104 | def test_url_metric(mock_glean_ping):
105 |     """
106 |     Tests that we handle URL metrics
107 |     """
108 |     mock_dryrun = MockDryRunContext(MockDryRunPingView, False)
109 |     mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}]
110 |     glean_app = Mock()
111 |     glean_app.get_probes.return_value = [
112 |         GleanProbe(
113 |             "fun.url_metric",
114 |             {
115 |                 "type": "url",
116 |                 "history": [
117 |                     {
118 |                         "send_in_pings": ["dash-name"],
119 |                         "dates": {
120 |                             "first": "2020-01-01 00:00:00",
121 |                             "last": "2020-01-02 00:00:00",
122 |                         },
123 |                     }
124 |                 ],
125 |                 "name": "url_metric",
126 |             },
127 |         ),
128 |     ]
129 |     mock_glean_ping.return_value = glean_app
130 |     view = GleanPingView(
131 |         "glean_app",
132 |         "dash_name",
133 |         [{"channel": "release", "table": "mozdata.glean_app.dash_name"}],
134 |     )
135 |     lookml = view.to_lookml("glean-app", dryrun=mock_dryrun)
136 |     assert len(lookml["views"]) == 1
137 |     assert len(lookml["views"][0]["dimensions"]) == 1
138 |     assert (
139 |         lookml["views"][0]["dimensions"][0]["name"] == "metrics__url2__fun_url_metric"
140 |     )
141 | 
142 | 
143 | @patch("generator.views.glean_ping_view.GleanPing")
144 | def test_datetime_metric(mock_glean_ping):
145 |     """
146 |     Tests that we handle datetime metrics
147 |     """
148 |     mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}]
149 |     glean_app = Mock()
150 |     mock_dryrun = MockDryRunContext(MockDryRunPingView, False)
151 |     glean_app.get_probes.return_value = [
152 |         GleanProbe(
153 |             "fun.datetime_metric",
154 |             {
155 |                 "type": "datetime",
156 |                 "history": [
157 |                     {
158 |                         "send_in_pings": ["dash-name"],
159 |                         "dates": {
160 |                             "first": "2020-01-01 00:00:00",
161 |                             "last": "2020-01-02 00:00:00",
162 |                         },
163 |                     }
164 |                 ],
165 |                 "name": "datetime_metric",
166 |             },
167 |         ),
168 |     ]
169 |     mock_glean_ping.return_value = glean_app
170 |     view = GleanPingView(
171 |         "glean_app",
172 |         "dash_name",
173 |         [{"channel": "release", "table": "mozdata.glean_app.dash_name"}],
174 |     )
175 |     lookml = view.to_lookml("glean-app", dryrun=mock_dryrun)
176 |     assert len(lookml["views"]) == 1
177 |     assert len(lookml["views"][0]["dimension_groups"]) == 1
178 |     assert (
179 |         lookml["views"][0]["dimension_groups"][0]["name"]
180 |         == "metrics__datetime__fun_datetime_metric"
181 |     )
182 |     assert "timeframes" in lookml["views"][0]["dimension_groups"][0]
183 |     assert "group_label" not in lookml["views"][0]["dimension_groups"][0]
184 |     assert "group_item_label" not in lookml["views"][0]["dimension_groups"][0]
185 |     assert "links" not in lookml["views"][0]["dimension_groups"][0]
186 | 
187 | 
188 | @patch("generator.views.glean_ping_view.GleanPing")
189 | def test_undeployed_probe(mock_glean_ping):
190 |     """
191 |     Tests that we handle metrics not yet deployed to bigquery
192 |     """
193 |     mock_glean_ping.get_repos.return_value = [{"name": "glean-app"}]
194 |     glean_app = Mock()
195 |     glean_app.get_probes.return_value = [
196 |         GleanProbe(
197 |             f"fun.{name}",
198 |             {
199 |                 "type": "labeled_counter",
200 |                 "history": [
201 |                     {
202 |                         "send_in_pings": ["dash-name"],
203 |                         "dates": {
204 |                             "first": "2020-01-01 00:00:00",
205 |                             "last": "2020-01-02 00:00:00",
206 |                         },
207 |                     }
208 |                 ],
209 |                 "name": name,
210 |             },
211 |         )
212 |         # "counter_metric2" represents a probe not present in the table schema
213 |         for name in ["counter_metric", "counter_metric2"]
214 |     ]
215 |     mock_glean_ping.return_value = glean_app
216 |     mock_dryrun = MockDryRunContext(MockDryRunPingView, False)
217 |     view = GleanPingView(
218 |         "glean_app",
219 |         "dash_name",
220 |         [{"channel": "release", "table": "mozdata.glean_app.dash_name"}],
221 |     )
222 |     lookml = view.to_lookml("glean-app", dryrun=mock_dryrun)
223 |     # In addition to the table view, each labeled counter adds a join view and a suggest
224 |     # view. Expect 3 views, because 1 for the table view, 2 added for fun.counter_metric
225 |     # because it's in the table schema, and 0 added for fun.counter_metric2 because it's
226 |     # not in the table schema.
227 |     assert len(lookml["views"]) == 2
228 | 


--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from google.cloud import bigquery
 3 | 
 4 | 
 5 | @pytest.fixture
 6 | def client():
 7 |     return bigquery.Client()
 8 | 
 9 | 
10 | @pytest.mark.integration
11 | def test_google_connection(client):
12 |     job = client.query("SELECT NULL")
13 |     assert [(None,)] == [tuple(row) for row in job.result()]
14 | 


--------------------------------------------------------------------------------
/tests/test_lookml_utils.py:
--------------------------------------------------------------------------------
 1 | from generator.views.lookml_utils import escape_filter_expr
 2 | 
 3 | 
 4 | def test_escape_char():
 5 |     expr = "a_b"
 6 |     assert escape_filter_expr(expr) == "a^_b"
 7 | 
 8 | 
 9 | def test_escape_multi_char():
10 |     expr = 'a_b%c,d"f^g'
11 |     assert escape_filter_expr(expr) == 'a^_b^%c^,d^"f^^g'
12 | 
13 | 
14 | def test_escape_leading_char():
15 |     expr = "-a-b"
16 |     assert escape_filter_expr(expr) == "^-a-b"
17 | 


--------------------------------------------------------------------------------
/tests/test_spoke.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from unittest.mock import Mock, patch
  3 | 
  4 | import lkml
  5 | import looker_sdk as _looker_sdk
  6 | import pytest
  7 | 
  8 | from generator.spoke import generate_directories
  9 | 
 10 | from .utils import print_and_test
 11 | 
 12 | 
 13 | @pytest.fixture()
 14 | def namespaces() -> dict:
 15 |     return {
 16 |         "glean-app": {
 17 |             "pretty_name": "Glean App",
 18 |             "glean_app": True,
 19 |             "spoke": "looker-spoke-default",
 20 |             "views": {
 21 |                 "baseline": {
 22 |                     "type": "ping_view",
 23 |                     "tables": [
 24 |                         {
 25 |                             "channel": "release",
 26 |                             "table": "mozdata.glean_app.baseline",
 27 |                         }
 28 |                     ],
 29 |                 }
 30 |             },
 31 |             "explores": {
 32 |                 "baseline": {"type": "ping_explore", "views": {"base_view": "baseline"}}
 33 |             },
 34 |         }
 35 |     }
 36 | 
 37 | 
 38 | @pytest.fixture
 39 | def custom_namespaces():
 40 |     return {
 41 |         "custom": {
 42 |             "glean_app": False,
 43 |             "spoke": "looker-spoke-private",
 44 |             "connection": "bigquery-oauth",
 45 |             "owners": ["custom-owner@allizom.com", "custom-owner2@allizom.com"],
 46 |             "pretty_name": "Custom",
 47 |             "views": {
 48 |                 "baseline": {
 49 |                     "tables": [
 50 |                         {"channel": "release", "table": "mozdata.custom.baseline"}
 51 |                     ],
 52 |                     "type": "ping_view",
 53 |                 }
 54 |             },
 55 |         }
 56 |     }
 57 | 
 58 | 
 59 | @patch("generator.spoke.looker_sdk")
 60 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilladev.cloud.looker.com"})
 61 | def test_generate_directories(looker_sdk, namespaces, tmp_path):
 62 |     sdk = looker_sdk.init40()
 63 |     sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)]
 64 |     sdk.lookml_model.side_effect = _looker_sdk.error.SDKError("msg")
 65 |     looker_sdk.error = Mock(SDKError=_looker_sdk.error.SDKError)
 66 | 
 67 |     generate_directories(namespaces, tmp_path, True)
 68 |     dirs = list((tmp_path / "looker-spoke-default").iterdir())
 69 |     assert dirs == [tmp_path / "looker-spoke-default" / "glean-app"]
 70 | 
 71 |     app_path = tmp_path / "looker-spoke-default" / "glean-app/"
 72 |     sub_dirs = set(app_path.iterdir())
 73 |     assert sub_dirs == {
 74 |         app_path / "views",
 75 |         app_path / "explores",
 76 |         app_path / "dashboards",
 77 |         app_path / "glean-app.model.lkml",
 78 |     }
 79 | 
 80 |     sdk.create_lookml_model.assert_called_once()
 81 |     sdk.update_model_set.assert_called_once()
 82 | 
 83 | 
 84 | @patch("generator.spoke.looker_sdk")
 85 | def test_generate_directories_no_sdk(looker_sdk, namespaces, tmp_path):
 86 |     sdk = looker_sdk.init40()
 87 |     sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)]
 88 | 
 89 |     generate_directories(namespaces, tmp_path, False)
 90 |     dirs = list((tmp_path / "looker-spoke-default").iterdir())
 91 |     assert dirs == [tmp_path / "looker-spoke-default" / "glean-app"]
 92 | 
 93 |     app_path = tmp_path / "looker-spoke-default" / "glean-app"
 94 |     sub_dirs = set(app_path.iterdir())
 95 |     assert sub_dirs == {
 96 |         app_path / "views",
 97 |         app_path / "explores",
 98 |         app_path / "dashboards",
 99 |         app_path / "glean-app.model.lkml",
100 |     }
101 | 
102 |     assert (app_path / "dashboards" / ".gitkeep").exists()
103 | 
104 |     sdk.create_lookml_model.assert_not_called()
105 | 
106 | 
107 | @patch("generator.spoke.looker_sdk")
108 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilladev.cloud.looker.com"})
109 | def test_existing_dir(looker_sdk, namespaces, tmp_path):
110 |     sdk = looker_sdk.init40()
111 |     sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)]
112 | 
113 |     generate_directories(namespaces, tmp_path, True)
114 |     tmp_file = tmp_path / "looker-spoke-default" / "glean-app" / "tmp-file"
115 |     tmp_file.write_text("hello, world")
116 | 
117 |     generate_directories(namespaces, tmp_path)
118 | 
119 |     # We shouldn't overwrite this dir
120 |     assert tmp_file.is_file()
121 | 
122 | 
123 | @patch("generator.spoke.looker_sdk")
124 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilla.cloud.looker.com"})
125 | def test_generate_model(looker_sdk, namespaces, tmp_path):
126 |     sdk = looker_sdk.init40()
127 |     sdk.search_model_sets.side_effect = [[Mock(models=["model"], id=1)]]
128 |     sdk.lookml_model.side_effect = _looker_sdk.error.SDKError("msg")
129 |     looker_sdk.error = Mock(SDKError=_looker_sdk.error.SDKError)
130 | 
131 |     write_model = Mock()
132 |     looker_sdk.models40.WriteModelSet.return_value = write_model
133 | 
134 |     generate_directories(namespaces, tmp_path, True)
135 |     expected_dict = {
136 |         "connection": "telemetry",
137 |         "label": "Glean App",
138 |         "includes": ["//looker-hub/glean-app/explores/*"],
139 |     }
140 | 
141 |     expected_text = """connection: "telemetry"
142 | label: "Glean App"
143 | # Include files from looker-hub or spoke-default below. For example:
144 | include: "//looker-hub/glean-app/explores/*"
145 | # include: "//looker-hub/glean-app/dashboards/*"
146 | # include: "views/*"
147 | # include: "explores/*"
148 | # include: "dashboards/*"
149 | """
150 |     actual_text = (
151 |         tmp_path / "looker-spoke-default" / "glean-app" / "glean-app.model.lkml"
152 |     ).read_text()
153 |     actual_dict = lkml.load(actual_text)
154 |     assert expected_text == actual_text
155 |     assert expected_dict == actual_dict
156 | 
157 |     looker_sdk.models40.WriteModelSet.assert_any_call(models=["model", "glean-app"])
158 |     assert looker_sdk.models40.WriteModelSet.call_count == 1
159 | 
160 |     sdk.update_model_set.assert_any_call(1, write_model)
161 | 
162 | 
163 | @patch("generator.spoke.looker_sdk")
164 | @patch.dict(os.environ, {"LOOKER_INSTANCE_URI": "https://mozilladev.cloud.looker.com"})
165 | def test_alternate_connection(looker_sdk, custom_namespaces, tmp_path):
166 |     sdk = looker_sdk.init40()
167 |     sdk.search_model_sets.return_value = [Mock(models=["model"], id=1)]
168 |     sdk.lookml_model.side_effect = _looker_sdk.error.SDKError("msg")
169 |     looker_sdk.error = Mock(SDKError=_looker_sdk.error.SDKError)
170 | 
171 |     write_model = Mock()
172 |     looker_sdk.models40.WriteLookmlModel.return_value = write_model
173 | 
174 |     generate_directories(custom_namespaces, tmp_path, True)
175 |     dirs = list((tmp_path / "looker-spoke-private").iterdir())
176 |     assert dirs == [tmp_path / "looker-spoke-private" / "custom"]
177 | 
178 |     app_path = tmp_path / "looker-spoke-private" / "custom"
179 |     sub_dirs = set(app_path.iterdir())
180 |     assert sub_dirs == {
181 |         app_path / "views",
182 |         app_path / "explores",
183 |         app_path / "dashboards",
184 |         app_path / "custom.model.lkml",
185 |     }
186 | 
187 |     expected_dict = {
188 |         "connection": "bigquery-oauth",
189 |         "label": "Custom",
190 |     }
191 |     expected_text = """connection: "bigquery-oauth"
192 | label: "Custom"
193 | # Include files from looker-hub or spoke-default below. For example:
194 | # include: "//looker-hub/custom/explores/*"
195 | # include: "//looker-hub/custom/dashboards/*"
196 | # include: "views/*"
197 | # include: "explores/*"
198 | # include: "dashboards/*"
199 | """
200 |     actual_text = (
201 |         tmp_path / "looker-spoke-private" / "custom" / "custom.model.lkml"
202 |     ).read_text()
203 |     actual_dict = lkml.load(actual_text)
204 |     print_and_test(expected_text, actual_text)
205 |     print_and_test(expected_dict, actual_dict)
206 | 
207 |     looker_sdk.models40.WriteLookmlModel.assert_called_with(
208 |         allowed_db_connection_names=["bigquery-oauth"],
209 |         name="custom",
210 |         project_name="spoke-private",
211 |     )
212 |     sdk.create_lookml_model.assert_called_with(write_model)
213 |     sdk.update_model_set.assert_called_once()
214 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for tests."""
  2 | 
  3 | import pprint
  4 | 
  5 | 
  6 | def get_differences(expected, result, path="", sep="."):
  7 |     """
  8 |     Get the differences between two JSON-like python objects.
  9 | 
 10 |     For complicated objects, this is a big improvement over pytest -vv.
 11 |     """
 12 |     differences = []
 13 | 
 14 |     if expected is not None and result is None:
 15 |         differences.append(("Expected exists but not Result", path))
 16 |     if expected is None and result is not None:
 17 |         differences.append(("Result exists but not Expected", path))
 18 |     if expected is None and result is None:
 19 |         return differences
 20 | 
 21 |     exp_is_dict, res_is_dict = isinstance(expected, dict), isinstance(result, dict)
 22 |     exp_is_list, res_is_list = isinstance(expected, list), isinstance(result, list)
 23 |     if exp_is_dict and not res_is_dict:
 24 |         differences.append(("Expected is dict but not Result", path))
 25 |     elif res_is_dict and not exp_is_dict:
 26 |         differences.append(("Result is dict but not Expected", path))
 27 |     elif not exp_is_dict and not res_is_dict:
 28 |         if exp_is_list and res_is_list:
 29 |             for i in range(max(len(expected), len(result))):
 30 |                 if i >= len(result):
 31 |                     differences.append(
 32 |                         (f"Result missing element {expected[i]}", path + sep + str(i))
 33 |                     )
 34 |                 elif i >= len(expected):
 35 |                     differences.append(
 36 |                         (
 37 |                             f"Result contains extra element {result[i]}",
 38 |                             path + sep + str(i),
 39 |                         )
 40 |                     )
 41 |                 else:
 42 |                     differences += get_differences(
 43 |                         expected[i], result[i], path + sep + str(i)
 44 |                     )
 45 |         elif expected != result:
 46 |             differences.append((f"Expected={expected}, Result={result}", path))
 47 |     else:
 48 |         exp_keys, res_keys = set(expected.keys()), set(result.keys())
 49 |         in_exp_not_res, in_res_not_exp = exp_keys - res_keys, res_keys - exp_keys
 50 | 
 51 |         for k in in_exp_not_res:
 52 |             differences.append(("In Expected, not in Result", path + sep + k))
 53 |         for k in in_res_not_exp:
 54 |             differences.append(("In Result, not in Expected", path + sep + k))
 55 | 
 56 |         for k in exp_keys & res_keys:
 57 |             differences += get_differences(expected[k], result[k], path + sep + k)
 58 | 
 59 |     return differences
 60 | 
 61 | 
 62 | def print_and_test(expected, result=None, actual=None):
 63 |     """Print objects and differences, then test equality."""
 64 |     pp = pprint.PrettyPrinter(indent=2)
 65 |     if actual is not None:
 66 |         result = actual
 67 | 
 68 |     print("\nExpected:")
 69 |     pp.pprint(expected)
 70 | 
 71 |     print("\nActual:")
 72 |     pp.pprint(result)
 73 | 
 74 |     print("\nDifferences:")
 75 |     print("\n".join([" - ".join(v) for v in get_differences(expected, result)]))
 76 | 
 77 |     assert result == expected
 78 | 
 79 | 
 80 | class MockDryRunContext:
 81 |     """Mock DryRunContext."""
 82 | 
 83 |     def __init__(
 84 |         self,
 85 |         cls,
 86 |         use_cloud_function=False,
 87 |         id_token=None,
 88 |         credentials=None,
 89 |     ):
 90 |         """Initialize dry run instance."""
 91 |         self.use_cloud_function = use_cloud_function
 92 |         self.id_token = id_token
 93 |         self.credentials = credentials
 94 |         self.cls = cls
 95 | 
 96 |     def create(
 97 |         self,
 98 |         sql=None,
 99 |         project="moz-fx-data-shared-prod",
100 |         dataset=None,
101 |         table=None,
102 |     ):
103 |         """Initialize passed MockDryRun instance."""
104 |         return self.cls(
105 |             use_cloud_function=self.use_cloud_function,
106 |             id_token=self.id_token,
107 |             credentials=self.credentials,
108 |             sql=sql,
109 |             project=project,
110 |             dataset=dataset,
111 |             table=table,
112 |         )
113 | 
114 | 
115 | class MockDryRun:
116 |     """Mock dryrun.DryRun."""
117 | 
118 |     def __init__(
119 |         self,
120 |         use_cloud_function,
121 |         id_token,
122 |         credentials,
123 |         sql=None,
124 |         project=None,
125 |         dataset=None,
126 |         table=None,
127 |     ):
128 |         """Create MockDryRun instance."""
129 |         self.sql = sql
130 |         self.project = project
131 |         self.dataset = dataset
132 |         self.table = table
133 |         self.use_cloud_function = use_cloud_function
134 |         self.credentials = credentials
135 |         self.id_token = id_token
136 | 


--------------------------------------------------------------------------------