├── .circleci
    └── config.yml
├── .dockerignore
├── .flake8
├── .gce_boto
├── .gcloudignore
├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    └── workflows
    │   ├── glean.yaml
    │   └── update-fog.yml
├── .gitignore
├── .yamllint
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── conftest.py
├── docker-compose.yml
├── docs.png
├── docs
    └── common-failures.md
├── fog-updater
    ├── Dockerfile
    ├── README.md
    ├── action.yml
    ├── requirements.txt
    └── src
    │   ├── fog_update.py
    │   └── test_util.py
├── main.py
├── notebooks
    └── load_and_run.ipynb
├── probe_scraper
    ├── __init__.py
    ├── check_repositories.py
    ├── emailer.py
    ├── exc.py
    ├── fog_checks.py
    ├── glean_checks.py
    ├── glean_push.py
    ├── model_validation.py
    ├── parsers
    │   ├── __init__.py
    │   ├── events.py
    │   ├── histograms.py
    │   ├── metrics.py
    │   ├── pings.py
    │   ├── repositories.py
    │   ├── scalars.py
    │   ├── tags.py
    │   ├── third_party
    │   │   ├── __init__.py
    │   │   ├── histogram_tools.py
    │   │   ├── parse_events.py
    │   │   ├── parse_scalars.py
    │   │   ├── shared_telemetry_utils.py
    │   │   └── usecounters.py
    │   └── utils.py
    ├── ping_expiry_alert.py
    ├── probe_expiry_alert.py
    ├── remote_storage.py
    ├── runner.py
    ├── scrapers
    │   ├── __init__.py
    │   ├── buildhub.py
    │   ├── git_scraper.py
    │   └── moz_central_scraper.py
    ├── transform_probes.py
    └── transform_revisions.py
├── probeinfo_api.yaml
├── pytest.ini
├── repositories.yaml
├── requirements.txt
├── setup.py
├── test_requirements.txt
└── tests
    ├── __init__.py
    ├── resources
        ├── Histograms.json
        ├── UseCounters.conf
        ├── metrics.yaml
        ├── nsDeprecatedOperationList.h
        ├── test_events.yaml
        ├── test_repo_files
        │   ├── duplicate
        │   │   └── 0
        │   │   │   └── metrics.yaml
        │   ├── expired
        │   │   └── 0
        │   │   │   └── metrics.yaml
        │   ├── improper
        │   │   └── 0
        │   │   │   └── metrics.yaml
        │   └── normal
        │   │   ├── 0
        │   │       └── metrics.yaml
        │   │   ├── 1
        │   │       └── metrics.yaml
        │   │   └── 2
        │   │       ├── metrics.yaml
        │   │       └── tags.yaml
        └── test_scalars.yaml
    ├── test_buildhub.py
    ├── test_event_parser.py
    ├── test_fog_checks.py
    ├── test_git_scraper.py
    ├── test_glean_checks.py
    ├── test_glean_limit_date.py
    ├── test_glean_push.py
    ├── test_histogram_parser.py
    ├── test_library_refs.py
    ├── test_metrics_parser.py
    ├── test_moz_central_scraper.py
    ├── test_ping_expiry_alert.py
    ├── test_probe_expiry_alert.py
    ├── test_repositories_parser.py
    ├── test_runner.py
    ├── test_scalar_parser.py
    └── test_transform_probes.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | version: 2
  4 | jobs:
  5 |   build_and_test:
  6 |     machine:
  7 |       image: default
  8 |     working_directory: ~/mozilla/probe-scraper
  9 |     steps:
 10 |       - checkout
 11 |       - run: make build
 12 |       - run: make lint
 13 |       - run: make check-repos
 14 |       - run: make test
 15 |       - run: make burnham-dryrun
 16 | 
 17 |   deploy_docker:
 18 |     docker: &gcloud-image
 19 |       - image: gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0-alpine
 20 |     working_directory: ~/mozilla/probe-scraper
 21 |     steps:
 22 |       - checkout
 23 |       - setup_remote_docker:
 24 |           docker_layer_caching: true
 25 |       - run:
 26 |           name: Build container
 27 |           command: |
 28 |             docker build -t app:build .
 29 |       - run:
 30 |           name: Configure gcloud
 31 |           command: |
 32 |             echo $GCLOUD_SERVICE_KEY | gcloud auth activate-service-account --key-file=-
 33 |             gcloud --quiet config set project ${GOOGLE_PROJECT_ID}
 34 |             gcloud --quiet config set compute/zone ${GOOGLE_COMPUTE_ZONE}
 35 |             gcloud auth configure-docker
 36 |       - run:
 37 |           name: Deploy to GCR
 38 |           command: |
 39 |             DOCKER_IMAGE="gcr.io/${GOOGLE_PROJECT_ID}/probe-scraper"
 40 |             # Deploy main
 41 |             if [ "${CIRCLE_BRANCH}" == main ]; then
 42 |               docker tag app:build "${DOCKER_IMAGE}:latest"
 43 |               docker push "${DOCKER_IMAGE}:latest"
 44 |             elif  [ ! -z "${CIRCLE_TAG}" ]; then
 45 |             # Deploy a release tag...
 46 |               echo "${DOCKER_IMAGE}:${CIRCLE_TAG}"
 47 |               docker tag app:build "${DOCKER_IMAGE}:${CIRCLE_TAG}"
 48 |               docker images
 49 |               docker push "${DOCKER_IMAGE}:${CIRCLE_TAG}"
 50 |             fi
 51 | 
 52 |   deploy_cloud_function:
 53 |     docker: *gcloud-image
 54 |     steps:
 55 |       - checkout
 56 |       - run:
 57 |           name: Install jq
 58 |           command: apk add jq
 59 |       - run:
 60 |           name: Activate Credentials
 61 |           command: |
 62 |             KEY="$(echo "$GCLOUD_SERVICE_KEY_PROD_B64" | base64 -d)"
 63 |             echo "$KEY" | gcloud --quiet auth activate-service-account --key-file=-
 64 |             gcloud --quiet config set project "$(echo "$KEY" | jq -r .project_id)"
 65 |       - run:
 66 |           # `--source=.` in the command below is a workaround for deployment issues
 67 |           # See DENG-3665
 68 |           name: Deploy Google Cloud Function
 69 |           command: >
 70 |             gcloud functions deploy glean-push
 71 |             --region=us-west1
 72 |             --allow-unauthenticated
 73 |             --entry-point=glean_push
 74 |             --memory=2048
 75 |             --runtime=python310
 76 |             --set-env-vars=BOTO_PATH=.gce_boto,OUTPUT_BUCKET=gs://probe-scraper-prod-artifacts/
 77 |             --trigger-http
 78 |             --service-account=$PROD_SERVICE_ACCOUNT_INVOKER
 79 |             --timeout=540s
 80 |             --source=.
 81 | 
 82 |   docs_build:
 83 |     docker:
 84 |       - image: cimg/node:lts
 85 |     steps:
 86 |       - checkout
 87 |       - run:
 88 |           name: Install redoc
 89 |           command: |
 90 |             npm install @redocly/cli
 91 |       - run:
 92 |           name: Build docs
 93 |           command: |
 94 |             npx @redocly/cli build-docs probeinfo_api.yaml -o docs/index.html \
 95 |               --theme.openapi.expandResponses="200,201" \
 96 |               --theme.openapi.jsonSampleExpandLevel=2
 97 |       - persist_to_workspace:
 98 |           root: docs
 99 |           paths: index.html
100 |   docs_deploy:
101 |     docker:
102 |       - image: cimg/node:lts
103 |     steps:
104 |       - checkout
105 |       - attach_workspace:
106 |           at: docs/
107 |       - run:
108 |           name: Disable jekyll builds
109 |           command: touch docs/.nojekyll
110 |       - run:
111 |           name: Install and configure dependencies
112 |           command: |
113 |             npm install gh-pages@2.0.1
114 |             git config user.email "ci-build@mozilla.com"
115 |             git config user.name "ci-build"
116 |       - add_ssh_keys:
117 |           fingerprints:
118 |             - "37:cd:ad:cf:75:1f:96:9f:9b:ce:e0:6c:b4:09:26:4d"
119 |       - run:
120 |           name: Deploy docs to gh-pages branch
121 |           command: npx gh-pages --dotfiles --message "[skip ci] Updates" --dist docs/
122 | 
123 | workflows:
124 |   version: 2
125 |   build-test-deploy:
126 |     jobs:
127 |       - build_and_test:
128 |           filters:
129 |             tags:
130 |               only: /.*/
131 |       - docs_build
132 |       - docs_deploy:
133 |           requires:
134 |             - docs_build
135 |           filters:
136 |             branches:
137 |               only: main
138 |       - deploy_docker:
139 |           context: data-eng-airflow-gcr
140 |           requires:
141 |             - build_and_test
142 |           filters:
143 |             tags:
144 |               only: /.*/
145 |             branches:
146 |               only: main
147 |       - deploy_cloud_function:
148 |           context: probe-scraper
149 |           requires:
150 |             - build_and_test
151 |           filters:
152 |             branches:
153 |               only: main
154 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # idea project settings
 86 | .idea/
 87 | *.iml
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # Cache files
 96 | probe_scraper_errors_cache.json
 97 | probe_scraper_cache.sqlite
 98 | 
 99 | # Temp files.
100 | temp/
101 | _tmp/
102 | _out/
103 | _temp/
104 | 
105 | *.json
106 | !tests/resources/*.json
107 | !schemas/*.json
108 | 
109 | # Generated files
110 | index.html
111 | emails.txt
112 | .repositories.yaml
113 | 
114 | # Ignore .DS_Store
115 | .DS_Store
116 | 
117 | # A convenient place for the cache
118 | .scraper_cache/
119 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # See http://pep8.readthedocs.io/en/latest/intro.html#configuration
3 | max-line-length = 100
4 | filename = *.py, +.lint
5 | exclude = probe_scraper/parsers/third_party/* venv
6 | 


--------------------------------------------------------------------------------
/.gce_boto:
--------------------------------------------------------------------------------
1 | [GoogleCompute]
2 | service_account = default
3 | 
4 | [GSUtil]
5 | state_dir = /tmp/gsutil
6 | 


--------------------------------------------------------------------------------
/.gcloudignore:
--------------------------------------------------------------------------------
  1 | .git/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *,cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # IPython Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # dotenv
 81 | .env
 82 | 
 83 | # virtualenv
 84 | venv/
 85 | ENV/
 86 | 
 87 | # idea project settings
 88 | .idea/
 89 | *.iml
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # Cache files
 98 | probe_scraper_errors_cache.json
 99 | probe_scraper_cache.sqlite
100 | 
101 | # Temp files.
102 | temp/
103 | _tmp/
104 | _out/
105 | _temp/
106 | 
107 | *.json
108 | !tests/resources/*.json
109 | !schemas/*.json
110 | 
111 | # Generated files
112 | index.html
113 | emails.txt
114 | .repositories.yaml
115 | 
116 | # Ignore .DS_Store
117 | .DS_Store
118 | 
119 | # A convenient place for the cache
120 | .scraper_cache/
121 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Adding/changing the data in repositories.yaml can have large downstream
 2 | # effects, so we're a little stricter on who can sign off on changes here.
 3 | repositories.yaml @chutten @akkomar @whd @mikaeld @dexterp37 @badboy @travis79
 4 | 
 5 | # The exclusion list in git_scraper.py can cause similar problems (see e.g.
 6 | # https://bugzilla.mozilla.org/show_bug.cgi?id=1745771)
 7 | probe_scraper/scrapers/git_scraper.py @chutten @akkomar @whd @mikaeld @dexterp37 @badboy @travis79
 8 | 
 9 | fog_updater/* @chutten @badboy
10 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   # Auto-update to next glean-parser major version
 4 |   - package-ecosystem: "pip"
 5 |     directory: "/"
 6 |     schedule:
 7 |       interval: "daily"
 8 |     versioning-strategy: lockfile-only
 9 |     reviewers:
10 |       - "mozilla/glean"
11 |     versioning-strategy: increase-if-necessary
12 |     allow:
13 |       - dependency-name: "glean-parser"
14 | 


--------------------------------------------------------------------------------
/.github/workflows/glean.yaml:
--------------------------------------------------------------------------------
 1 | name: Glean probe-scraper
 2 | 
 3 | on:
 4 |   workflow_call:
 5 | 
 6 | jobs:
 7 |   probe-scraper:
 8 |     name: Glean probe-scraper
 9 |     runs-on: ubuntu-22.04
10 |     steps:
11 |       - name: Validate Glean metrics via probe-scraper, and if appropriate publish changes
12 |         run: |-
13 |           curl --fail-with-body https://us-west1-moz-fx-data-probe-s-prod-2bc3.cloudfunctions.net/glean-push --data '{
14 |             "url": "${{github.server_url}}/${{github.repository}}",
15 |             "commit":"${{github.sha}}",
16 |             "branch":"${{github.ref_name}}"
17 |           }'
18 | 
19 | 


--------------------------------------------------------------------------------
/.github/workflows/update-fog.yml:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/
 4 | 
 5 | # This workflow periodically calls the fog-update-bot action to update
 6 | # the list of FOG metrics.yaml and ping.yaml files from its metrics_index.py
 7 | 
 8 | 
 9 | name: "Update FOG"
10 | 
11 | permissions:
12 |   contents: write
13 |   pull-requests: write
14 | 
15 | on:
16 |   schedule:
17 |     # 04:20 UTC - every morning
18 |     # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule
19 |     - cron: '20 4 * * *'
20 |   workflow_dispatch:
21 | 
22 | jobs:
23 |   main:
24 |     name: "Update FOG"
25 |     runs-on: ubuntu-22.04
26 |     steps:
27 |       - name: Checkout
28 |         uses: actions/checkout@v3
29 |       - name: "Update FOG"
30 |         uses: ./fog-updater
31 |         id: fog-updater
32 |         if: github.repository == 'mozilla/probe-scraper'
33 |         env:
34 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # idea project settings
 86 | .idea/
 87 | *.iml
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # Cache files
 96 | probe_scraper_errors_cache.json
 97 | probe_scraper_cache.sqlite
 98 | 
 99 | # Temp files.
100 | temp/
101 | _tmp/
102 | _out/
103 | _temp/
104 | 
105 | *.json
106 | !tests/resources/*.json
107 | !schemas/*.json
108 | 
109 | # Generated files
110 | index.html
111 | emails.txt
112 | .repositories.yaml
113 | 
114 | # Ignore .DS_Store
115 | .DS_Store
116 | 
117 | # A convenient place for the cache
118 | .scraper_cache/
119 | 
120 | # JavaScript tooling
121 | node_modules/
122 | 


--------------------------------------------------------------------------------
/.yamllint:
--------------------------------------------------------------------------------
 1 | extends: default
 2 | 
 3 | rules:
 4 |   line-length:
 5 |     allow-non-breakable-words: true
 6 |     allow-non-breakable-inline-mappings: true
 7 |     ignore: |
 8 |       .circleci/config.yml
 9 |       probeinfo_api.yaml
10 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Community Participation Guidelines
 2 | 
 3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines. 
 4 | For more details, please read the
 5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 
 6 | 
 7 | ## How to Report
 8 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page.
 9 | 
10 | <!--
11 | ## Project Specific Etiquette
12 | 
13 | In some cases, there will be additional project etiquette i.e.: (https://bugzilla.mozilla.org/page.cgi?id=etiquette.html).
14 | Please update for your project.
15 | -->
16 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Thank you for your interest in contributing to probe-scraper!
 2 | This document tries to codify some best practices for contribution to this
 3 | repository.
 4 | 
 5 | ## Participation guidelines
 6 | 
 7 | All communication is expected to follow the
 8 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
 9 | For more information, see the [code of conduct document](./CODE_OF_CONDUCT.md)
10 | in the root of this repository.
11 | 
12 | ## Filing issues
13 | 
14 | File an issue if you have a bug report or feature request that you (personally)
15 | do not intend to work on right away _or_ you would like additional feedback on
16 | your approach before starting implementation work. If you found a bug (or small
17 | missing feature) and you want to start implementing it immediately (or already
18 | have a solution), go ahead and skip straight to making a pull request (see
19 | below).
20 | 
21 | To help with triage, issues should have a descriptive title. Examples of good
22 | issue titles:
23 | 
24 | - Require channels be unique for applications
25 | - "Telemetry Probe Expiry" emails sometimes don't include list of filed bugs
26 | 
27 | In the issue itself, provide as much information as necessary to help someone
28 | reading it understand the nature of the problem (and provide feedback). For
29 | examples of this, look at some of the
30 | [fixed issues](https://github.com/mozilla/probe-scraper/issues?q=is%3Aissue+is%3Aclosed)
31 | filed by the project maintainers.
32 | 
33 | Occasionally, probe-scraper bugs are tracked inside Bugzilla, especially for issues
34 | which might affect other parts of the pipeline.
35 | 
36 | ## Opening pull requests
37 | 
38 | Like issues, pull requests should have a descriptive title to help with triage.
39 | However there are two things that are different:
40 | 
41 | - Instead of pointing out a problem, they should describe the solution
42 | - If a pull request fixes a specific issue, the title should specify
43 |   `(fixes #X)` (where X refers to the issue number)
44 | 
45 | For example, a pull request to fix an issue entitled `"Telemetry Probe Expiry" emails sometimes don't include list of filed bugs` could be named `Include list of filed bugs in "Telemetry Probe Expiry" emails (fixes #1234)`.
46 | 
47 | When a pull request fixes a bug in Bugzilla, prepend the bug number to the title with
48 | the keyword `Bug ` in the format `Bug XXXX - <one-line description>`.
49 | This allow the [Bugzilla PR Linker] to link to this PR automatically in bugzilla.
50 | For example, `Bug 1234 - Include list of filed bugs in "Telemetry Probe Expiry" emails`.
51 | 
52 | As much as possible, each pull request should attempt to solve _one problem_.
53 | For logically separate changes, file multiple PRs.
54 | 
55 | Make sure that the pull request passes continuous integration (including linter
56 | errors) and that there are no merge conflicts before asking for review. If you
57 | want some feedback on a work-in-progress (where these conditions are not yet
58 | met), mark your pull request as a draft.
59 | 
60 | [bugzilla pr linker]: https://github.com/mozilla/github-bugzilla-pr-linker
61 | 
62 | ## Dangerous changes
63 | 
64 | This repository is central to the ingestion and processing of Telemetry data at
65 | Mozilla.
66 | Changes made to probe-scraper can have large downstream consequences, such as unwanted changes to our BigQuery tables.
67 | In particular, adding new Glean repositories (`repositories.yaml` at the root
68 | of this repository) needs to be done with care.
69 | 
70 | Things to bear in mind:
71 | 
72 | - Once probe scraper has successfully run, there is no changing or rewriting history of the metrics files, as this will cause problems downstream with [mozilla-schema-generator].
73 | - There is currently no provision for deleting a repository once added (see [bug 1747811]).
74 | 
75 | As such, testing of works in progress should happen locally with a probe-scraper checkout (see the "dry run" instructions in the README) and/or evaluating test pings via the [Glean Debug Ping Viewer].
76 | Under no circumstances should you add a testing application to "see what happens".
77 | If you only want part of the history of a repository processed by probe-scraper, you can set a "start
78 | date" in `probe_scraper/scrapers/git_scraper.py` _before_ the first successful run of probe-scraper
79 | against it (i.e. the changes to `git_scraper.py` and `repositories.yaml` should land as a unit).
80 | 
81 | To try and prevent incidents from occurring, changes to these files must go through people who have extensive
82 | experience debugging and reasoning about the schema generation portions of the data pipeline, documented in `.github/CODEOWNERS`.
83 | If you submit a pull request, these people will automatically be flagged for review.
84 | 
85 | [mozilla-schema-generator]: https://github.com/mozilla/mozilla-schema-generator
86 | [bug 1747811]: https://bugzilla.mozilla.org/show_bug.cgi?id=1747811
87 | [glean debug ping viewer]: https://mozilla.github.io/glean/book/user/debugging/index.html#glean-debug-view
88 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | ENV PYTHONUNBUFFERED=1
 4 | 
 5 | ARG APP_NAME=probe-scraper
 6 | ENV APP_NAME=${APP_NAME}
 7 | 
 8 | # Guidelines here: https://github.com/mozilla-services/Dockerflow/blob/master/docs/building-container.md
 9 | ARG USER_ID="10001"
10 | ARG GROUP_ID="app"
11 | ARG HOME="/app"
12 | 
13 | ENV HOME=${HOME}
14 | RUN groupadd --gid ${USER_ID} ${GROUP_ID} && \
15 |     useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /app ${GROUP_ID}
16 | 
17 | # List packages here
18 | RUN apt-get update && \
19 |     apt-get install -y --no-install-recommends \
20 |         file        \
21 |         gcc         \
22 |         libwww-perl && \
23 |     apt-get autoremove -y && \
24 |     apt-get clean
25 | 
26 | # Upgrade pip
27 | RUN pip install --upgrade pip
28 | # Ensure setuptools is new enough, to avoid issues with wheels
29 | RUN pip install 'setuptools>=70.1'
30 | 
31 | WORKDIR ${HOME}
32 | 
33 | COPY requirements.txt ${HOME}/
34 | RUN pip install -r requirements.txt
35 | 
36 | COPY test_requirements.txt ${HOME}/
37 | RUN pip install -r test_requirements.txt
38 | 
39 | COPY . ${HOME}
40 | RUN pip install .
41 | 
42 | # Drop root and change ownership of the application folder to the user
43 | RUN chown -R ${USER_ID}:${GROUP_ID} ${HOME}
44 | USER ${USER_ID}
45 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help clean lint test build docker-rm shell run stop apidoc
 2 | 
 3 | help:
 4 | 	@echo "  apidoc                 Render the API documentation locally to index.html"
 5 | 	@echo "  clean                  Remove build artifacts"
 6 | 	@echo "  check-repos            Verify all repositories in repositories.yaml are scrapable"
 7 | 	@echo "  lint                   Check style with flake8"
 8 | 	@echo "  format                 Format code with black and isort"
 9 | 	@echo "  test                   Run tests quickly with the default Python"
10 | 	@echo "  build                  Builds the docker images for the docker-compose setup"
11 | 	@echo "  docker-rm              Stops and removes all docker containers"
12 | 	@echo "  shell                  Opens a Bash shell"
13 | 	@echo "  run                    Run a command. Can run scripts, e.g. make run COMMAND=\"./scripts/schema_generator.sh\""
14 | 	@echo "  stop                   Stop docker compose"
15 | 
16 | clean: clean-build clean-pyc docker-rm
17 | 
18 | clean-build:
19 | 	rm -fr build/
20 | 	rm -fr dist/
21 | 	rm -fr *.egg-info
22 | 
23 | clean-pyc:
24 | 	find . -name '*.pyc' -exec rm -f {} +
25 | 	find . -name '*.pyo' -exec rm -f {} +
26 | 	find . -name '*~' -exec rm -f {} +
27 | 
28 | apidoc:
29 | 	# Keep in sync with doc task in .circleci/config.yml
30 | 	docker run --rm \
31 | 		-v ${PWD}:/local \
32 | 		cimg/node:lts \
33 | 		sh -c "npm install @redocly/cli; npx @redocly/cli build-docs /local/probeinfo_api.yaml -o /local/index.html --theme.openapi.expandResponses='200,201' --theme.openapi.jsonSampleExpandLevel=2"
34 | 
35 | format:
36 | 	python3 -m black probe_scraper tests ./*.py
37 | 	python3 -m isort --profile black probe_scraper tests ./*.py
38 | 
39 | lint: build
40 | 	docker-compose run app flake8 .
41 | 	docker-compose run app yamllint repositories.yaml .circleci
42 | 	docker-compose run app python -m black --check probe_scraper tests ./*.py
43 | 	docker-compose run app python -m isort --profile black --check-only probe_scraper tests ./*.py
44 | 
45 | check-repos:
46 | 	docker-compose run app python -m probe_scraper.check_repositories
47 | 
48 | test: build
49 | 	docker-compose run app pytest tests/ --run-web-tests
50 | 
51 | # For this test, we scrape glean-core and burnham.
52 | # Even though burnham is deprecated, it should still be valid to be scraped
53 | # See also mozilla/probe-scraper#283.
54 | # We set a limit date due to more strict parsing.
55 | # glean-core's metrics.yaml prior to 2023-10-20 cannot be parsed with a modern glean-parser.
56 | burnham-dryrun:
57 | 	docker-compose run app python -m probe_scraper.runner --glean --glean-repo glean-core --glean-repo glean-android --glean-repo burnham --glean-limit-date 2023-10-20 --dry-run
58 | 
59 | build:
60 | 	docker-compose build
61 | 
62 | docker-rm: stop
63 | 	docker-compose rm -f
64 | 
65 | shell:
66 | 	docker-compose run --entrypoint "/bin/bash" app
67 | 
68 | run: build
69 | 	docker-compose run app $(COMMAND)
70 | 
71 | stop:
72 | 	docker-compose down
73 | 	docker-compose stop
74 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # probe-scraper
  2 | Scrape Telemetry probe data from Firefox repositories.
  3 | 
  4 | This extracts per-version Telemetry probe data for Firefox and other Mozilla products from registry files like Histograms.json and Scalars.yaml.
  5 | The data allows answering questions like "which Firefox versions is this Telemetry probe in anyway?".
  6 | Also, probes outside of Histograms.json - like the CSS use counters - are included in the output data.
  7 | 
  8 | The data is pulled from two different sources:
  9 | - From [`hg.mozilla.org`](https://hg.mozilla.org) for Firefox data.
 10 | - From a [configurable set of Github repositories](repositories.yaml) that use [Glean](https://github.com/mozilla-mobile/android-components/tree/master/components/service/glean).
 11 | 
 12 | Probe Scraper outputs JSON to https://probeinfo.telemetry.mozilla.org.
 13 | Effectively, this creates a REST API which can be used by downstream tools like
 14 | [mozilla-schema-generator](https://github.com/mozilla/mozilla-schema-generator)
 15 | and various data dictionary type applications (see below).
 16 | 
 17 | An [OpenAPI reference](https://mozilla.github.io/probe-scraper/) to this API is available:
 18 | 
 19 | <a href="https://mozilla.github.io/probe-scraper/" rel="probeinfo API docs">![probeinfo API docs](docs.png)</a>
 20 | 
 21 | A web tool to explore the Firefox-related data is available at [probes.telemetry.mozilla.org](https://probes.telemetry.mozilla.org/). A project to develop a similar view for Glean-based data
 22 | is under development in the [Glean Dictionary](https://github.com/mozilla/glean-dictionary).
 23 | 
 24 | ## Deprecation
 25 | 
 26 | Deprecation is an important step in an application lifecycle. Because of the backwards-compatible nature of our pipeline, we do not
 27 | remove Glean apps or variants from the `repositories.yaml` file - instead, we mark them as deprecated.
 28 | 
 29 | ### Marking an App Variant as deprecated
 30 | 
 31 | When an app variant is marked as deprecated (see this [example from Fenix](https://github.com/mozilla/probe-scraper/blob/213055f967b4903933667002ec376cd69cdf5a77/repositories.yaml#L415-L431)), the following happens:
 32 | - It shows as `[Deprecated]` in the Glean Dictionary, in the `Access` section (see e.g. [Fenix's client_id metric](https://dictionary.telemetry.mozilla.org/apps/fenix/metrics/client_id)).
 33 | 
 34 | ### Marking an App as deprecated
 35 | 
 36 | When an app is marked as deprecated (see this [example of Firefox for Fire TV](https://github.com/mozilla/probe-scraper/blob/213055f967b4903933667002ec376cd69cdf5a77/repositories.yaml#L501-L504)), the following happens:
 37 | - It no longer shows by default in the Glean Dictionary. (Deprecated apps can be viewed by clicking the `Show deprecated applications` checkbox)
 38 | 
 39 | ## Adding a New Glean Repository
 40 | 
 41 | To scrape a git repository for probe definitions, an entry needs to be added in `repositories.yaml`.
 42 | The exact format of the entry depends on whether you are adding an application or a library. See below for details.
 43 | 
 44 | ### Adding an application
 45 | 
 46 | For a given application, Glean metrics are emitted by the application itself, any libraries it uses
 47 | that also use Glean, as well as the Glean library proper. Therefore, probe scraper needs a way to
 48 | find all of the dependencies to determine all of the metrics emitted by
 49 | that application.
 50 | 
 51 | Therefore, each application should specify a `dependencies` parameter, which is a list of Glean-using libraries used by the application.  Each entry should be a library name as specified by the library's `library_names` parameter.
 52 | 
 53 | For Android applications, if you're not sure what the dependencies of the application are, you can run the following command at the root of the project folder:
 54 | 
 55 | ```bash
 56 | $ ./gradlew :app:dependencies
 57 | ```
 58 | 
 59 | See the full [application schema documentation](https://mozilla.github.io/probe-scraper/#tag/application)
 60 | for descriptions of all the available parameters.
 61 | 
 62 | ### Adding a library
 63 | 
 64 | Probe scraper also needs a way to map dependencies back to an entry in the
 65 | `repositories.yaml` file. Therefore, any libraries defined should also include
 66 | their build-system-specific library names in the `library_names` parameter.
 67 | 
 68 | See the full [library schema documentation](https://mozilla.github.io/probe-scraper/#tag/library)
 69 | for descriptions of all the available parameters.
 70 | 
 71 | ## Developing the probe-scraper
 72 | 
 73 | You can choose to develop using the container, or locally. Using the container will be slower, since changes will trigger a rebuild of the container.
 74 | But using the container method will ensure that your PR passes CircleCI build/test phases.
 75 | 
 76 | ### Local development
 77 | 
 78 | You may wish to,
 79 | instead of installing all these requirements in your global Python environment,
 80 | start by generating and activating a
 81 | [Python virtual environment](https://docs.python.org/3/library/venv.html).
 82 | The `.gitignore` expects it to be called `ENV` or `venv`:
 83 | ```console
 84 | python -m venv venv
 85 | . venv/bin/activate
 86 | ```
 87 | 
 88 | Install the requirements:
 89 | ```
 90 | pip install -r requirements.txt
 91 | pip install -r test_requirements.txt
 92 | python setup.py develop
 93 | ```
 94 | 
 95 | Run tests. This by default does not run tests that require a web connection:
 96 | ```
 97 | pytest tests/
 98 | ```
 99 | 
100 | To run all tests, including those that require a web connection:
101 | ```
102 | pytest tests/ --run-web-tests
103 | ```
104 | 
105 | To test whether the code conforms to the style rules, you can run:
106 | ```
107 | python -m black --check probe_scraper tests ./*.py
108 | flake8 --max-line-length 100 probe_scraper tests ./*.py
109 | yamllint repositories.yaml .circleci
110 | python -m isort --profile black --check-only probe_scraper tests ./*.py
111 | ```
112 | 
113 | To render API documentation locally to `index.html`:
114 | ```
115 | make apidoc
116 | ```
117 | 
118 | ### Developing using the container
119 | 
120 | Run tests in container. This does not run tests that require a web connection:
121 | ```
122 | export COMMAND='pytest tests/'
123 | make run
124 | ```
125 | 
126 | To run all tests, including those that require a web connection:
127 | ```
128 | make test
129 | ```
130 | 
131 | To test whether the code conforms to the style rules, you can run:
132 | ```
133 | make lint
134 | ```
135 | 
136 | ### Tests with Web Dependencies
137 | 
138 | Any tests that require a web connection to run should be marked with `@pytest.mark.web_dependency`.
139 | 
140 | These will not run by default, but will run on CI.
141 | 
142 | ### Performing a Dry-Run
143 | 
144 | Before opening a PR, it's good to test the code you wrote on the production data. You can specify a specific Firefox
145 | version to run on by using `first-version`:
146 | ```
147 | export COMMAND='python -m probe_scraper.runner --firefox-version 65 --dry-run'
148 | make run
149 | ```
150 | or locally via:
151 | ```
152 | python -m probe_scraper.runner --firefox-version 65 --dry-run
153 | ```
154 | 
155 | Including `--dry-run` means emails will not be sent.
156 | 
157 | Additionally, you can test just on Glean repositories:
158 | ```
159 | export COMMAND='python -m probe_scraper.runner --glean --dry-run'
160 | make run
161 | ```
162 | 
163 | By default that will test against every Glean repository, which might take a while. If you want to test against just one (e.g. a new repository you're adding), you can use the `--glean-repo` argument to just test the repositories you care about:
164 | ```
165 | export COMMAND='python -m probe_scraper.runner --glean --glean-repo glean-core --glean-repo glean-android --glean-repo burnham --dry-run'
166 | make run
167 | ```
168 | 
169 | Replace burnham in the example above with your repository and its dependencies.
170 | 
171 | You can also do the dry-run locally:
172 | 
173 | ```
174 | python -m probe_scraper.runner --glean --glean-repo glean-core --glean-repo glean-android --glean-repo burnham --dry-run
175 | ```
176 | 
177 | ## Module overview
178 | 
179 | The module is built around the following data flow:
180 | 
181 | - scrape registry files from mozilla-central, clone files from repositories directory
182 | - extract probe data from the files
183 | - transform probe data into output formats
184 | - save to disk
185 | 
186 | The code layout consists mainly of:
187 | 
188 | - `probe_scraper`
189 |   - `runner.py` - the central script, ties the other pieces together
190 |   - `scrapers`
191 |      - `buildhub.py` - pull build info from the [BuildHub](https://buildhub.moz.tools) service
192 |      - `moz_central_scraper.py` - loads probe registry files for multiple versions from mozilla-central
193 |      - `git_scraper.py` - loads probe registry files from a git repository (no version or channel support yet, just per-commit)
194 |   - `parsers/` - extract probe data from the registry files
195 |      - `third_party` - these are imported parser scripts from [mozilla-central](https://dxr.mozilla.org/mozilla-central/source/toolkit/components/telemetry/)
196 |    - `transform_*.py` - transform the extracted raw data into output formats
197 | - `tests/` - the unit tests
198 | 
199 | ## Accessing the data files
200 | The processed probe data is serialized to the disk in a directory hierarchy starting from the provided output directory. The directory layout resembles a REST-friendly structure.
201 | 
202 |     |-- product
203 |         |-- general
204 |         |-- revisions
205 |         |-- channel (or "all")
206 |             |-- ping type
207 |                 |-- probe type (or "all_probes")
208 | 
209 | For example, all the JSON probe data in the [main ping]() for the *Firefox Nightly* channel can be accessed with the following path: `firefox/nightly/main/all_probes`. The probe data for all the channels (same product and ping) can be accessed instead using `firefox/all/main/all_probes`.
210 | 
211 | The root directory for the output generated from the scheduled job can be found at <https://probeinfo.telemetry.mozilla.org/>.
212 | All the probe data for Firefox coming from the main ping can be found at <https://probeinfo.telemetry.mozilla.org/firefox/all/main/all_probes>.
213 | 
214 | ## Accessing `Glean` metrics data
215 | Glean data is generally laid out as follows:
216 | 
217 | ```
218 | | -- glean
219 |     | -- repositories
220 |     | -- general
221 |     | -- repository-name
222 |         | -- general
223 |         | -- metrics
224 | ```
225 | 
226 | For example, the data for a repository called `fenix` would be found at [`/glean/fenix/metrics`](https://probeinfo.telemetry.mozilla.org/glean/fenix/metrics). The time the data was last updated for that project can be found at [`glean/fenix/general`](https://probeinfo.telemetry.mozilla.org/glean/fenix/general).
227 | 
228 | A list of available repositories is at [`/glean/repositories`](https://probeinfo.telemetry.mozilla.org/glean/repositories).
229 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | def pytest_addoption(parser):
 5 |     parser.addoption(
 6 |         "--run-web-tests",
 7 |         action="store_true",
 8 |         default=False,
 9 |         help="Run tests that require a web connection",
10 |     )
11 | 
12 | 
13 | def pytest_collection_modifyitems(config, items):
14 |     if config.getoption("--run-web-tests"):
15 |         return
16 |     skip_web = pytest.mark.skip(reason="Need --run-web-tests option to run")
17 |     for item in items:
18 |         if "web_dependency" in item.keywords:
19 |             item.add_marker(skip_web)
20 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 |   app:
3 |     build:
4 |       context: .
5 |       dockerfile: Dockerfile
6 |     restart: "no"
7 |     command: "true"
8 | 


--------------------------------------------------------------------------------
/docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/probe-scraper/0506e31f38e61ddc662c0eab18826b370314896e/docs.png


--------------------------------------------------------------------------------
/docs/common-failures.md:
--------------------------------------------------------------------------------
 1 | # Common failures of probe-scraper runs and how to solve them
 2 | 
 3 | `probe-scraper` runs every week day in pull mode for some repositories, such as `mozilla-central`.
 4 | It looks at all commits changing metric and ping definition files (`metrics.yaml`, `pings.yaml`).
 5 | This can fail for a variety of reasons.
 6 | 
 7 | ## Backouts
 8 | 
 9 | Commits adding new metric or ping files get backed out, thus removing the file again.
10 | 
11 | ### Solution
12 | 
13 | Add the offending commits to the the `SKIP_COMMITS` list of the product in [`probe_scraper/scrapers/git_scraper.py`][skipcommits].
14 | 
15 | 
16 | [skipcommits]: https://github.com/mozilla/probe-scraper/blob/1d23fcf4d041ea7fdf3e2c0c79252151f472ad0b/probe_scraper/scrapers/git_scraper.py
17 | 
18 | 
19 | ## Invalid metric definitions files
20 | 
21 | A new commit changes a `metrics.yaml` file in a way that fails to parse.
22 | That is fixed in a subsequent commit.
23 | 
24 | ### Solution
25 | 
26 | Add the offending commit(s) to `SKIP_COMMITS` as above for [Backouts](#backouts).
27 | 
28 | ## Invalid metric definitions files in the past
29 | 
30 | A `metrics.yaml` is already available in old commits in a project, but invalid.
31 | At some point later the file is fixed and correct.
32 | 
33 | ### Solution
34 | 
35 | Add a minimal date from which to start parsing the file in `MIN_DATES` in [`probe-scraper/probe_scraper/scrapers/git_scraper.py`][mindates].
36 | 
37 | [mindates]: https://github.com/mozilla/probe-scraper/blob/1d23fcf4d041ea7fdf3e2c0c79252151f472ad0b/probe_scraper/scrapers/git_scraper.py#L29
38 | 


--------------------------------------------------------------------------------
/fog-updater/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10
 2 | 
 3 | WORKDIR /usr/src/app
 4 | 
 5 | COPY requirements.txt ./
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY src/* ./
 9 | 
10 | ENTRYPOINT ["/usr/src/app/fog_update.py"]
11 | 


--------------------------------------------------------------------------------
/fog-updater/README.md:
--------------------------------------------------------------------------------
 1 | # fog-update-bot
 2 | 
 3 | Automation to update `repositories.yaml` of `probe-scraper` with the latest `metrics_index.py` list.
 4 | 
 5 | Fetches and parses the `metrics_index.py` from `mozilla-firefox/firefox`, extracts the relevant list of YAML files
 6 | and creates a new Pull Request against `probe-scraper` if it contains any changes.
 7 | 
 8 | ## Environment variables
 9 | 
10 | | Name | Description |
11 | | ---- | ----------- |
12 | | `DEBUG` | If set enables debug logging |
13 | | `DRY_RUN` | If set to `True` will not create a PR |
14 | | `GITHUB_REPOSITORY_OWNER` | The owner of the `probe-scraper` repository |
15 | | `AUTHOR_NAME` | The name to use for the commit |
16 | | `AUTHOR_EMAIL` | The email to use for the commit |
17 | 
18 | ## Running with Docker
19 | 
20 | ```
21 | $ docker build -t fog-update .
22 | $ docker run -it --rm fog-update
23 | ```
24 | 
25 | ## Development
26 | 
27 | ```
28 | $ python3 -m venv env
29 | $ pip install -r requirements.txt
30 | $ pip install pytest
31 | ```
32 | 
33 | ## Testing
34 | 
35 | You can run the tests:
36 | 
37 | ```
38 | pytest
39 | ```
40 | 
41 | Manual runs of the updater requires a `GITHUB_TOKEN`.
42 | Go to <https://github.com/settings/tokens> and create a new token (no additional scopes necessary).
43 | Set it in your shell:
44 | 
45 | ```
46 | export GITHUB_TOKEN=<the generated token>
47 | ```
48 | 
49 | ## Code of Conduct
50 | 
51 | This repository is governed by Mozilla's code of conduct and etiquette guidelines.
52 | For more details, please read the
53 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
54 | 
55 | See [CODE_OF_CONDUCT.md](../CODE_OF_CONDUCT.md)
56 | 
57 | ## License
58 | 
59 |     This Source Code Form is subject to the terms of the Mozilla Public
60 |     License, v. 2.0. If a copy of the MPL was not distributed with this
61 |     file, You can obtain one at http://mozilla.org/MPL/2.0/
62 | 
63 | See [LICENSE](../LICENSE).
64 | 


--------------------------------------------------------------------------------
/fog-updater/action.yml:
--------------------------------------------------------------------------------
1 | name: 'fog-updater'
2 | description: "Update repositories.yaml with FOG's metric files"
3 | runs:
4 |   using: 'docker'
5 |   image: 'Dockerfile'
6 | 


--------------------------------------------------------------------------------
/fog-updater/requirements.txt:
--------------------------------------------------------------------------------
1 | PyGithub==2.5.0
2 | requests==2.32.3
3 | PyYAML==6.0.2
4 | 


--------------------------------------------------------------------------------
/fog-updater/src/fog_update.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # This Source Code Form is subject to the terms of the Mozilla Public
  4 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/
  6 | 
  7 | from github import Github, GithubException, InputGitAuthor, enable_console_debug_logging
  8 | import datetime
  9 | import difflib
 10 | import io
 11 | import os
 12 | import requests
 13 | import sys
 14 | import yaml
 15 | 
 16 | DEFAULT_ORGANIZATION = "mozilla"
 17 | DEFAULT_AUTHOR_NAME = "data-updater"
 18 | DEFAULT_AUTHOR_EMAIL = "telemetry-alerts@mozilla.com"
 19 | USAGE = "usage: fog-update"
 20 | HTTP_HEADERS = {
 21 |     "user-agent": "probe-scraper/1.0",
 22 | }
 23 | INDEX_URL = "https://raw.githubusercontent.com/mozilla-firefox/firefox/main/toolkit/components/glean/metrics_index.py"  # noqa
 24 | FFX_IOS_INDEX_URL = "https://raw.githubusercontent.com/mozilla-mobile/firefox-ios/main/firefox-ios/Client/Glean/glean_index.yaml"  # noqa
 25 | BODY_TEMPLATE = f"""This (automated) patch updates the list from metrics_index.py.
 26 | 
 27 | For reviewers:
 28 | 
 29 | * Canonical source for the index: <{INDEX_URL}>
 30 | * Please double-check that the changes here are valid and that the referenced files exist.
 31 |     * If the referenced files do not exist, schema deploys will fail
 32 | * Delete this branch after merging or closing the PR.
 33 | 
 34 | ---
 35 | 
 36 | The source code of this automation bot lives in <https://github.com/mozilla/probe-scraper/tree/main/fog-updater>.
 37 | """  # noqa
 38 | 
 39 | 
 40 | class UnmodifiedException(Exception):
 41 |     pass
 42 | 
 43 | 
 44 | def ts():
 45 |     return str(datetime.datetime.now())
 46 | 
 47 | 
 48 | def eval_extract(code):
 49 |     """
 50 |     Eval `code` and return a map of variables and their values.
 51 | 
 52 |     `code` should be valid Python code.
 53 |     Only the builtins `list` and `set` are provided.
 54 | 
 55 |     Note: this executes arbitrary Python code.
 56 |     Because of the limited builtins list this should be reasonably safe.
 57 |     Still only use this with known valid code!
 58 |     """
 59 | 
 60 |     # Allow `list` and `set`, so `list(set(a+b+c))` works.
 61 |     globs = {"__builtins__": {"list": list, "set": set, "sorted": sorted}}
 62 |     exec(code, globs)
 63 |     globs.pop("__builtins__")
 64 |     return globs
 65 | 
 66 | 
 67 | def swap_file_list(content, app, files, metrics_or_pings, library=False):
 68 |     """
 69 |     Replace the list of `metrics_files` or `ping_files` in `content` with `files`
 70 |     for the given app or library..
 71 |     Returns the changed content.
 72 | 
 73 |     All other content is left untouched.
 74 |     YAML syntax is assumed.
 75 |     File entries are correctly indented.
 76 |     """
 77 |     output = io.StringIO()
 78 |     state = None
 79 |     if library:
 80 |         app = f"- library_name: {app}"
 81 |     else:
 82 |         app = f"- app_name: {app}"
 83 |     indent = 0
 84 | 
 85 |     lines = content.split("\n")
 86 | 
 87 |     # Remove trailing newlines.
 88 |     while not lines[-1]:
 89 |         lines.pop()
 90 | 
 91 |     for line in lines:
 92 |         if state is None and line.strip() == app:
 93 |             state = "app"
 94 |         elif (
 95 |             state == "app"
 96 |             and metrics_or_pings == "metrics"
 97 |             and "metrics_files:" in line
 98 |         ):
 99 |             state = "files"
100 |         elif state == "app" and metrics_or_pings == "pings" and "ping_files:" in line:
101 |             state = "files"
102 |         elif state == "app" and metrics_or_pings == "tags" and "tag_files:" in line:
103 |             state = "files"
104 |         elif state == "files":
105 |             if line.strip().startswith("-"):
106 |                 indent = line.find("-")
107 |                 continue
108 |             else:
109 |                 for file in files:
110 |                     print(" " * indent, file=output, end="")
111 |                     print(f"- {file}\n", file=output, end="")
112 |                 state = None
113 | 
114 |         print(line, file=output)
115 | 
116 |     return output.getvalue()
117 | 
118 | 
119 | def get_latest_metrics_index():
120 |     r = requests.get(INDEX_URL, headers=HTTP_HEADERS)
121 |     r.raise_for_status()
122 |     return r.text
123 | 
124 | 
125 | def get_latest_ios_metrics_index():
126 |     r = requests.get(FFX_IOS_INDEX_URL, headers=HTTP_HEADERS)
127 |     r.raise_for_status()
128 |     return r.text
129 | 
130 | 
131 | def _rewrite_repositories_yaml(repo, branch, data, debug=False):
132 |     contents = repo.get_contents("repositories.yaml", ref=branch)
133 |     content = contents.decoded_content.decode("utf-8")
134 | 
135 |     new_content = content
136 |     for item in data:
137 |         name, metrics_or_pings, library, files = item
138 |         new_content = swap_file_list(
139 |             new_content, name, files, metrics_or_pings, library
140 |         )
141 | 
142 |     if content == new_content:
143 |         raise UnmodifiedException(
144 |             "Update to repositories.yaml resulted in no changes: maybe the file was already up to date?"  # noqa
145 |         )
146 | 
147 |     if debug:
148 |         diff = difflib.unified_diff(
149 |             content.splitlines(keepends=True),
150 |             new_content.splitlines(keepends=True),
151 |             fromfile="old/repositories.yaml",
152 |             tofile="new/repositories.yaml",
153 |         )
154 |         sys.stdout.writelines(diff)
155 | 
156 |     return new_content
157 | 
158 | 
159 | def _commit_repositories_yaml(repo, branch, author, new_content):
160 |     contents = repo.get_contents("repositories.yaml", ref=branch)
161 | 
162 |     repo.update_file(
163 |         contents.path,
164 |         "Update repositories.yaml with new FOG metrics_yamls list",
165 |         new_content,
166 |         contents.sha,
167 |         branch=branch,
168 |         author=author,
169 |     )
170 | 
171 |     return True
172 | 
173 | 
174 | def main(argv, repo, author, debug=False, dry_run=False):
175 |     if len(argv) < 1:
176 |         print(USAGE)
177 |         sys.exit(1)
178 | 
179 |     release_branch_name = "main"
180 |     short_version = "main"
181 | 
182 |     metrics_index = get_latest_metrics_index()
183 |     data = eval_extract(metrics_index)
184 |     gecko_metrics = sorted(data["gecko_metrics"])
185 |     gecko_pings = sorted(data["gecko_pings"])
186 |     firefox_desktop_metrics = sorted(data["firefox_desktop_metrics"])
187 |     firefox_desktop_pings = sorted(data["firefox_desktop_pings"])
188 |     background_update_metrics = sorted(data["background_update_metrics"])
189 |     background_update_pings = sorted(data["background_update_pings"])
190 |     background_tasks_metrics = sorted(data["background_tasks_metrics"])
191 |     background_tasks_pings = sorted(data["background_tasks_pings"])
192 | 
193 |     ios_metrics_index = get_latest_ios_metrics_index()
194 |     data = yaml.safe_load(ios_metrics_index)
195 |     firefox_ios_pings = sorted(data["ping_files"])
196 |     firefox_ios_metrics = sorted(data["metrics_files"])
197 |     firefox_ios_tags = sorted(data["tag_files"])
198 | 
199 |     data = [
200 |         # Name, metrics/pings, library?, files
201 |         ["gecko", "metrics", True, gecko_metrics],
202 |         ["gecko", "pings", True, gecko_pings],
203 |         ["firefox_desktop", "metrics", False, firefox_desktop_metrics],
204 |         ["firefox_desktop", "pings", False, firefox_desktop_pings],
205 |         [
206 |             "firefox_desktop_background_update",
207 |             "metrics",
208 |             False,
209 |             background_update_metrics,
210 |         ],
211 |         ["firefox_desktop_background_update", "pings", False, background_update_pings],
212 |         [
213 |             "firefox_desktop_background_tasks",
214 |             "metrics",
215 |             False,
216 |             background_tasks_metrics,
217 |         ],
218 |         ["firefox_desktop_background_tasks", "pings", False, background_tasks_pings],
219 |         ["firefox_ios", "pings", False, firefox_ios_pings],
220 |         ["firefox_ios", "metrics", False, firefox_ios_metrics],
221 |         ["firefox_ios", "tags", False, firefox_ios_tags],
222 |     ]
223 | 
224 |     print(f"{ts()} Updating repositories.yaml")
225 |     try:
226 |         new_content = _rewrite_repositories_yaml(
227 |             repo, release_branch_name, data, debug=dry_run or debug
228 |         )
229 |     except UnmodifiedException as e:
230 |         print(f"{ts()} {e}")
231 |         return
232 |     except Exception as e:
233 |         print(f"{ts()} {e}")
234 |         raise
235 | 
236 |     if dry_run:
237 |         print(f"{ts()} Dry-run so not continuing.")
238 |         return
239 | 
240 |     # Create a non unique PR branch name for work on this ac release branch.
241 |     pr_branch_name = f"fog-update/update-metrics-index-{short_version}"
242 | 
243 |     try:
244 |         pr_branch = repo.get_branch(pr_branch_name)
245 |         if pr_branch:
246 |             print(f"{ts()} The PR branch {pr_branch_name} already exists. Exiting.")
247 |             return
248 |     except GithubException:
249 |         # TODO Only ignore a 404 here, fail on others
250 |         pass
251 | 
252 |     release_branch = repo.get_branch(release_branch_name)
253 |     print(f"{ts()} Last commit on {release_branch_name} is {release_branch.commit.sha}")
254 | 
255 |     print(f"{ts()} Creating branch {pr_branch_name} on {release_branch.commit.sha}")
256 |     repo.create_git_ref(
257 |         ref=f"refs/heads/{pr_branch_name}", sha=release_branch.commit.sha
258 |     )
259 |     print(f"{ts()} Created branch {pr_branch_name} on {release_branch.commit.sha}")
260 | 
261 |     _commit_repositories_yaml(repo, pr_branch_name, author, new_content)
262 | 
263 |     print(f"{ts()} Creating pull request")
264 |     pr = repo.create_pull(
265 |         title=f"Update to latest metrics_index list on {release_branch_name}",
266 |         body=BODY_TEMPLATE,
267 |         head=pr_branch_name,
268 |         base=release_branch_name,
269 |     )
270 |     print(f"{ts()} Pull request at {pr.html_url}")
271 | 
272 | 
273 | if __name__ == "__main__":
274 |     debug = os.getenv("DEBUG") is not None
275 |     if debug:
276 |         enable_console_debug_logging()
277 | 
278 |     github_access_token = os.getenv("GITHUB_TOKEN")
279 |     if not github_access_token:
280 |         print("No GITHUB_TOKEN set. Exiting.")
281 |         sys.exit(1)
282 | 
283 |     github = Github(github_access_token)
284 |     if github.get_user() is None:
285 |         print("Could not get authenticated user. Exiting.")
286 |         sys.exit(1)
287 | 
288 |     dry_run = os.getenv("DRY_RUN") == "True"
289 | 
290 |     organization = os.getenv("GITHUB_REPOSITORY_OWNER") or DEFAULT_ORGANIZATION
291 | 
292 |     repo = github.get_repo(f"{organization}/probe-scraper")
293 | 
294 |     author_name = os.getenv("AUTHOR_NAME") or DEFAULT_AUTHOR_NAME
295 |     author_email = os.getenv("AUTHOR_EMAIL") or DEFAULT_AUTHOR_EMAIL
296 |     author = InputGitAuthor(author_name, author_email)
297 | 
298 |     print(
299 |         f"{ts()} This is fog-update working on https://github.com/{organization} as {author_email} / {author_name}"  # noqa
300 |     )
301 | 
302 |     main(sys.argv, repo, author, debug, dry_run)
303 | 


--------------------------------------------------------------------------------
/fog-updater/src/test_util.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/
  4 | 
  5 | 
  6 | from fog_update import eval_extract, swap_file_list
  7 | 
  8 | 
  9 | REPOSITORIES_YAML = """
 10 | ---
 11 | version: "2"
 12 | libraries:
 13 |   - library_name: gecko
 14 |     description: The browser engine developed by Mozilla
 15 |     notification_emails:
 16 |       - chutten@mozilla.com
 17 |     url: https://github.com/mozilla-firefox/firefox
 18 |     metrics_files:
 19 |       - LIB_METRICS_FILES
 20 |     ping_files:
 21 |       - LIB_PING_FILES
 22 | 
 23 | applications:
 24 |   - app_name: firefox_desktop
 25 |     metrics_files:
 26 |       - METRICS_FILES
 27 |     ping_files:
 28 |       - PING_FILES
 29 |   - app_name: firefox_desktop_background_update
 30 |     metrics_files:
 31 |       - OTHER_METRICS_FILES
 32 |     ping_files:
 33 |       - OTHER_PING_FILES
 34 | """
 35 | 
 36 | METRICS_INDEX = """
 37 | # -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
 38 | # vim: set filetype=python:
 39 | 
 40 | first_yamls = ["A", "B"]
 41 | second_yamls = ["B", "C"]
 42 | metrics_yamls = sorted(list(set(first_yamls + second_yamls)))
 43 | 
 44 | pings_yamls = [
 45 |     "D",
 46 |     "E",
 47 |     "F"
 48 | ]
 49 | """
 50 | 
 51 | 
 52 | def test_eval_metrics_index():
 53 |     data = eval_extract(METRICS_INDEX)
 54 |     assert data["first_yamls"] == ["A", "B"]
 55 |     assert data["second_yamls"] == ["B", "C"]
 56 |     assert data["metrics_yamls"] == ["A", "B", "C"]
 57 |     assert data["pings_yamls"] == ["D", "E", "F"]
 58 | 
 59 | 
 60 | def test_swap_repositories_yaml():
 61 |     data = eval_extract(METRICS_INDEX)
 62 |     metrics_files = data["metrics_yamls"]
 63 |     output = swap_file_list(
 64 |         REPOSITORIES_YAML, "firefox_desktop", metrics_files, "metrics"
 65 |     )
 66 | 
 67 |     # New files added.
 68 |     assert "- METRICS_FILES" not in output
 69 |     assert "- A" in output
 70 |     assert "- B" in output
 71 |     assert "- C" in output
 72 |     # ping files untouched.
 73 |     assert "- PING_FILES" in output
 74 | 
 75 |     # Other app untouched
 76 |     assert "- OTHER_METRICS_FILES" in output
 77 |     assert "- OTHER_PING_FILES" in output
 78 | 
 79 | 
 80 | def test_swap_ping_files():
 81 |     data = eval_extract(METRICS_INDEX)
 82 |     metrics_files = data["pings_yamls"]
 83 |     output = swap_file_list(
 84 |         REPOSITORIES_YAML, "firefox_desktop", metrics_files, "pings"
 85 |     )
 86 | 
 87 |     # metrics files untouched.
 88 |     assert "- METRICS_FILES" in output
 89 |     # New files added.
 90 |     assert "- PING_FILES" not in output
 91 |     assert "- D" in output
 92 |     assert "- E" in output
 93 |     assert "- F" in output
 94 | 
 95 |     # Other app untouched
 96 |     assert "- OTHER_METRICS_FILES" in output
 97 |     assert "- OTHER_PING_FILES" in output
 98 | 
 99 | 
100 | def test_swap_repositories_yaml_unchanged():
101 |     metrics_files = ["METRICS_FILES"]
102 |     output = swap_file_list(
103 |         REPOSITORIES_YAML, "firefox_desktop", metrics_files, "metrics"
104 |     )
105 | 
106 |     # New files added.
107 |     assert "- METRICS_FILES" in output
108 |     assert "- A" not in output
109 |     # ping files untouched.
110 |     assert "- PING_FILES" in output
111 | 
112 |     # Other app untouched
113 |     assert "- OTHER_METRICS_FILES" in output
114 |     assert "- OTHER_PING_FILES" in output
115 | 
116 | 
117 | def test_libraries():
118 |     data = eval_extract(METRICS_INDEX)
119 |     metrics_files = data["metrics_yamls"]
120 |     output = swap_file_list(
121 |         REPOSITORIES_YAML, "gecko", metrics_files, "metrics", library=True
122 |     )
123 | 
124 |     # New files added.
125 |     assert "- LIB_METRICS_FILES" not in output
126 |     assert "- A" in output
127 |     assert "- B" in output
128 |     assert "- C" in output
129 |     # ping files untouched.
130 |     assert "- LIB_PING_FILES" in output
131 | 
132 |     # Other app untouched
133 |     assert "- METRICS_FILES" in output
134 |     assert "- PING_FILES" in output
135 |     assert "- OTHER_METRICS_FILES" in output
136 |     assert "- OTHER_PING_FILES" in output
137 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | """Google Cloud Function entry points.
 2 | 
 3 | These must be in main.py in same directory as requirements.txt and cannot be nested
 4 | inside another package.
 5 | """
 6 | 
 7 | from probe_scraper.glean_push import main as glean_push
 8 | 
 9 | __all__ = ["glean_push"]
10 | 


--------------------------------------------------------------------------------
/notebooks/load_and_run.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This comes from https://github.com/harterrt/cookiecutter-python-etl/\n",
 12 |     "# Thanks Harter!"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import boto3\n",
 24 |     "import botocore\n",
 25 |     "import os\n",
 26 |     "\n",
 27 |     "from io import BytesIO\n",
 28 |     "from gzip import GzipFile"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "repo_dir = \"probe-scraper\"\n",
 40 |     "output_dir = \"/home/hadoop/analyses/probe_data\"\n",
 41 |     "cache_dir = \"/home/hadoop/analyses/probe_cache\"\n",
 42 |     "repo_https_url = \"https://github.com/mozilla/probe-scraper\"\n",
 43 |     "\n",
 44 |     "S3_PUBLIC_BUCKET = \"telemetry-public-analysis-2\"\n",
 45 |     "S3_DATA_PATH = \"probe-scraper/data-rest/\""
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "!rm -rf $repo_dir"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "!rm -rf $output_dir"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "!rm -rf $cache_dir"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "!git config --global user.email \"gfritzsche@mozilla.com\" && \\\n",
 90 |     "git config --global user.name \"Georg Fritzsche\""
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "!git clone $repo_https_url $repo_dir"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "!cd $repo_dir && git pull origin master && python setup.py bdist_egg"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "!mkdir $output_dir && mkdir $cache_dir"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "collapsed": false
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "!cd $repo_dir && pip install -r requirements.txt && python probe_scraper/runner.py --outdir $output_dir --tempdir $cache_dir"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {
140 |     "collapsed": true
141 |    },
142 |    "source": [
143 |     "## Upload the output to S3."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "# Get access to the S3 connect API.\n",
155 |     "client = boto3.client('s3', 'us-west-2')"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {
162 |     "collapsed": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "def gzip_compress(source_file):\n",
167 |     "    \"\"\" Apply GZIP compression to the content of the provided file.\n",
168 |     "\n",
169 |     "    :param source_file: the absolute path of the file to compress.\n",
170 |     "    :return: The gzip compressed content of the input file.\n",
171 |     "    \"\"\"\n",
172 |     "    with open(source_file) as fi:\n",
173 |     "        text_body = fi.read().decode(\"utf-8\")\n",
174 |     "\n",
175 |     "    gz_body = BytesIO()\n",
176 |     "    gz = GzipFile(None, 'wb', 9, gz_body)\n",
177 |     "    gz.write(text_body.encode('utf-8'))\n",
178 |     "    gz.close()\n",
179 |     "    \n",
180 |     "    return gz_body.getvalue()"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "for path, subdirs, files in os.walk(output_dir):\n",
192 |     "    relative_path = os.path.relpath(path, output_dir)\n",
193 |     "    # GZIP-compress the files, then copy them to S3. Allow caching for 8 hours.\n",
194 |     "    for file_name in files:\n",
195 |     "        source_path = os.path.join(path, file_name)\n",
196 |     "        key_path = os.path.join(S3_DATA_PATH, relative_path, file_name)\n",
197 |     "        print \"uploading \" + file_name + \" to s3: \" + key_path\n",
198 |     "        client.put_object(ACL='public-read', Bucket=S3_PUBLIC_BUCKET,\n",
199 |     "                          Key=key_path, Body=gzip_compress(source_path),\n",
200 |     "                          ContentEncoding='gzip', CacheControl='max-age=28800',\n",
201 |     "                          ContentType='application/json')"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "anaconda-cloud": {},
216 |   "kernelspec": {
217 |    "display_name": "Python [conda root]",
218 |    "language": "python",
219 |    "name": "conda-root-py"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 2
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython2",
231 |    "version": "2.7.12"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 1
236 | }
237 | 


--------------------------------------------------------------------------------
/probe_scraper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/probe-scraper/0506e31f38e61ddc662c0eab18826b370314896e/probe_scraper/__init__.py


--------------------------------------------------------------------------------
/probe_scraper/check_repositories.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from collections import defaultdict
 4 | from typing import Set, Tuple
 5 | 
 6 | import git
 7 | import requests as reqs
 8 | 
 9 | from .parsers.repositories import RepositoriesParser
10 | 
11 | GIT = git.Git()
12 | GIT_BRANCH_PATTERN = re.compile("ref: refs/heads/([^\t]+)\tHEAD")
13 | GITHUB_RAW_URL = "https://raw.githubusercontent.com"
14 | REPOSITORIES = os.path.join(
15 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "repositories.yaml"
16 | )
17 | EXPECTED_MISSING_FILES: Set[Tuple[str, str]] = {
18 |     ("support-migration", "components/support/migration/metrics.yaml"),
19 |     ("viu-politica", "source/telemetry/metrics.yaml"),
20 | }
21 | validation_errors = []
22 | repos = RepositoriesParser().parse(REPOSITORIES)
23 | 
24 | app_id_channels = defaultdict(lambda: defaultdict(lambda: 0))
25 | 
26 | repo_by_library_name = {}
27 | for repo in repos:
28 |     for library_name in repo.library_names or []:
29 |         repo_by_library_name[library_name] = repo.name
30 | 
31 | for repo in repos:
32 |     metrics_files = repo.get_metrics_file_paths()
33 |     temp_errors = []
34 | 
35 |     if repo.app_id and repo.channel and not repo.deprecated:
36 |         app_id_channels[repo.app_id][repo.channel] += 1
37 | 
38 |     for metric_file in metrics_files:
39 |         if repo.deprecated:
40 |             continue  # ignore missing files for deprecated apps
41 | 
42 |         if (repo.name, metric_file) in EXPECTED_MISSING_FILES:
43 |             continue  # ignore missing files
44 | 
45 |         branch = repo.branch
46 |         if branch is None:
47 |             match = GIT_BRANCH_PATTERN.match(
48 |                 GIT.ls_remote("--symref", repo.url, "HEAD")
49 |             )
50 |             if match is None:
51 |                 temp_errors += ["Failed to get default branch from git for " + repo.url]
52 |                 continue
53 |             branch = match.groups()[0]
54 | 
55 |         temp_url = (
56 |             repo.url.replace("https://github.com", GITHUB_RAW_URL)
57 |             + "/"
58 |             + branch
59 |             + "/"
60 |             + metric_file
61 |         )
62 |         response = reqs.get(temp_url)
63 |         if response.status_code != 200:
64 |             temp_errors += ["Metrics file was not found at " + temp_url]
65 | 
66 |     for library_name in repo.dependencies:
67 |         if library_name not in repo_by_library_name:
68 |             temp_errors.append(f"Dependency not found: {library_name}")
69 |     if temp_errors and not repo.prototype:
70 |         validation_errors.append({"repo": repo.name, "errors": temp_errors})
71 | 
72 | # Ensure non-deprecated channels are uniquely named
73 | duplication_errors = []
74 | for app_id, channels in app_id_channels.items():
75 |     temp_errors = []
76 |     for channel_name, num in channels.items():
77 |         if num > 1:
78 |             duplication_errors.append(
79 |                 f"Non-deprecated channel names must be unique, found {channel_name} {num} "
80 |                 f"times for {app_id}"
81 |             )
82 | 
83 | if validation_errors:
84 |     print("\nSummary of validation errors:\n")
85 |     print(f"{len(validation_errors)} repositories had problems\n")
86 |     for error in validation_errors:
87 |         print(f"\nErrors found in {error['repo']}:\n")
88 |         for line_errors in error["errors"]:
89 |             print(line_errors)
90 | 
91 | if duplication_errors:
92 |     print("\nDuplicate channel names found:\n")
93 |     for duplication_error in duplication_errors:
94 |         print(duplication_error)
95 | 
96 | if validation_errors or duplication_errors:
97 |     exit(1)
98 | 


--------------------------------------------------------------------------------
/probe_scraper/emailer.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from email.mime.application import MIMEApplication
 6 | from email.mime.multipart import MIMEMultipart
 7 | from email.mime.text import MIMEText
 8 | from pathlib import Path
 9 | 
10 | import boto3
11 | import yaml
12 | 
13 | EMAIL_FILE = Path("emails.txt")
14 | 
15 | 
16 | def send_ses(
17 |     fromaddr, subject, body, recipients, filename="", dryrun=True, email_file=None
18 | ):
19 |     """Send an email via the Amazon SES service. Can specify a single or list of
20 |        recipients.
21 | 
22 |        Saves emails to `emails.txt`.
23 | 
24 |     Examples:
25 |     ```
26 |     send_ses('me@example.com', 'greetings', "Hi!", 'you@example.com')
27 |     ```
28 | 
29 |     ```
30 |     send_ses('me@example.com', 'greetings', "Hi!", ['a@example.com`, 'b@example.com'])
31 |     ```
32 | 
33 |     Raises a RuntimeError if the message did not send correctly."""
34 | 
35 |     if isinstance(recipients, list):
36 |         recipients = ",".join(recipients)
37 | 
38 |     email_data = [
39 |         {"from": fromaddr, "to": recipients, "subject": subject, "body": body}
40 |     ]
41 | 
42 |     if email_file is None:
43 |         email_file = EMAIL_FILE
44 | 
45 |     with open(email_file, "a") as f:
46 |         f.write(yaml.dump(email_data, default_flow_style=False))
47 | 
48 |     if dryrun:
49 |         email_txt = "\n".join(
50 |             [
51 |                 "New Email",
52 |                 "    From: " + fromaddr,
53 |                 "    To: " + recipients,
54 |                 "    Subject: " + subject,
55 |                 "    Body: " + body,
56 |             ]
57 |         )
58 |         print(email_txt)
59 |         return
60 | 
61 |     msg = MIMEMultipart()
62 |     msg["Subject"] = subject
63 |     msg["From"] = fromaddr
64 |     msg["To"] = recipients
65 |     msg.attach(MIMEText(body))
66 | 
67 |     if filename:
68 |         attachment = open(filename, "rb").read()
69 |         part = MIMEApplication(attachment)
70 |         part.add_header("Content-Disposition", "attachment", filename=filename)
71 |         msg.attach(part)
72 | 
73 |     ses = boto3.client("ses", region_name="us-west-2")
74 |     result = ses.send_raw_email(RawMessage={"Data": msg.as_string()})
75 | 
76 |     if "ErrorResponse" in result:
77 |         raise RuntimeError("Error sending email: " + result)
78 | 


--------------------------------------------------------------------------------
/probe_scraper/exc.py:
--------------------------------------------------------------------------------
 1 | class ProbeScraperError(Exception):
 2 |     """Exception type for returning errors in push mode."""
 3 | 
 4 |     def __init__(self, message, status_code):
 5 |         self.message = message
 6 |         self.status_code = status_code
 7 | 
 8 | 
 9 | class ProbeScraperInvalidRequest(ProbeScraperError):
10 |     """Exception type for returning HTTP 4XX in push mode."""
11 | 
12 |     def __init__(self, message, status_code=400):
13 |         super().__init__(message, status_code)
14 | 
15 | 
16 | class ProbeScraperServerError(ProbeScraperError):
17 |     """Exception type for returning HTTP 5XX in push mode."""
18 | 
19 |     def __init__(self, message, status_code=500):
20 |         super().__init__(message, status_code)
21 | 


--------------------------------------------------------------------------------
/probe_scraper/fog_checks.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | """
  6 | This file contains various checks for Firefox on Glean (FOG).
  7 | 
  8 | FOG is Glean, yes, but is sufficiently different that it benefits from doing
  9 | its own expiry checks. Sending its own emails. Filing its own bugs.
 10 | """
 11 | 
 12 | import re
 13 | from collections import defaultdict
 14 | from typing import Any, Dict, List, Optional, Set, TypedDict
 15 | 
 16 | from probe_scraper import probe_expiry_alert
 17 | 
 18 | from .glean_checks import get_current_metrics_by_repo
 19 | from .parsers.repositories import Repository
 20 | 
 21 | EXPIRED_METRICS_EMAIL_TEMPLATE = """
 22 | Each metric in the following list will soon expire at the end of Firefox {version}.
 23 | For your convenience, we've filed bugs to track the work of removing or renewing them:
 24 | 
 25 | {expiring_bugs_list}
 26 | 
 27 | What to do about this:
 28 | 
 29 | 1. If the metric is no longer needed, remove it from its `metrics.yaml` file.
 30 | 2. If the metric is still required, extend its expiration.
 31 | 
 32 | If you have any problems, please ask for help on the #glean Matrix channel[1]. We'll give you a hand.
 33 | 
 34 | What happens if you don't fix this:
 35 | 
 36 | The expiring metric will expire, causing a test failure which
 37 | * makes sheriffs unhappy,
 38 | * prevents developers from landing code, and
 39 | * generally makes for a bad time.
 40 | 
 41 | You will continue to get this e-mail as a reminder to clean up.
 42 | 
 43 | Your Friendly Neighbourhood Glean Team
 44 | 
 45 | [1] https://chat.mozilla.org/#/room/#glean:mozilla.org
 46 | 
 47 | This is an automated message sent from probe-scraper. See https://github.com/mozilla/probe-scraper for details.
 48 | """  # noqa
 49 | 
 50 | 
 51 | ###
 52 | # Types for Annotations:
 53 | ###
 54 | class Email(TypedDict):
 55 |     subject: str
 56 |     message: str
 57 | 
 58 | 
 59 | class EmailInfo(TypedDict):
 60 |     addresses: List[str]
 61 |     emails: List[Email]
 62 | 
 63 | 
 64 | # The full list of all repos that are FOG style. Must:
 65 | #  * Expire based on Firefox Desktop Nightly Version, and
 66 | #  * Use Bugzilla for its bug urls
 67 | FOG_REPOS: Set[str] = {"firefox-desktop", "gecko"}
 68 | 
 69 | 
 70 | # The BMO whiteboard tag to use for auto-filed bugs
 71 | BUG_WHITEBOARD_TAG = "[metric-expiry-alert]"
 72 | # The BMO Title, templated by version and metric family
 73 | BUG_SUMMARY_TEMPLATE = (
 74 |     "Remove or update metrics expiring at the end of Firefox {version}: {probe}"
 75 | )
 76 | # BE ALERT: We regex on this template to find existing bugs.
 77 | # SEE probe_expiry_alert.find_existing_bugs FOR DETAILS.
 78 | # IF YOU MODIFY THIS WITHOUT CARE WE WILL FILE DUPLICATE BUGS.
 79 | # Please be kind to your Sheriffs and only modify with care.
 80 | BUG_DESCRIPTION_TEMPLATE = """
 81 | The following metrics will expire at the end of Firefox Nightly release: [version {version}][1].
 82 | 
 83 | ```
 84 | {probes}
 85 | ```
 86 | 
 87 | {notes}
 88 | 
 89 | What to do about this:
 90 | 1. If one, some, or all of the metrics are no longer needed, please remove them from their `metrics.yaml` definition file.
 91 | 2. If one, some, or all of the metrics are still required, please submit a patch to extend their expiry.
 92 | 
 93 | If you have any problems, please ask for help on the [#glean Matrix room](https://chat.mozilla.org/#/room/#glean:mozilla.org) or the #data-help Slack channel.
 94 | We'll give you a hand.
 95 | 
 96 | Your Friendly Neighbourhood Glean Team
 97 | 
 98 | [1]: https://wiki.mozilla.org/Release_Management/Calendar
 99 | 
100 | ---
101 | This bug was auto-filed by [probe-scraper](https://github.com/mozilla/probe-scraper).
102 | """  # noqa
103 | 
104 | 
105 | BUG_NUMBER_PATTERN = re.compile(r"\d+")
106 | 
107 | 
108 | def get_expiring_metrics(
109 |     metrics: Dict[str, Dict], latest_nightly_version: str
110 | ) -> Dict[str, Dict]:
111 |     """
112 |     Filter the provided dict of metric name to metric info to just the expiring ones.
113 |     """
114 | 
115 |     # We start warning one version ahead.
116 |     target_version = int(latest_nightly_version) + 1
117 | 
118 |     expiring_metrics = {}
119 |     for metric_name, metric in metrics.items():
120 |         if metric["expires"] == "never":
121 |             continue
122 | 
123 |         if metric["expires"] == "expired":
124 |             # Also include manually-expired ones.
125 |             # This is not only technically correct, but makes testing easier.
126 |             expiring_metrics[metric_name] = metric
127 |             continue
128 | 
129 |         try:
130 |             expiry_version = int(metric["expires"])
131 |         except ValueError:
132 |             # Expires cannot be parsed as a version. Treat as unexpired.
133 |             # TODO: Should we send emails for unparseable expiry versions?
134 |             continue
135 | 
136 |         if expiry_version == target_version:
137 |             expiring_metrics[metric_name] = metric
138 | 
139 |     return expiring_metrics
140 | 
141 | 
142 | def bug_number_from_url(url: str) -> Optional[int]:
143 |     """
144 |     Given a bug url, get its bug number.
145 |     If we can't figure out a reasonable bug number, return None.
146 |     """
147 |     if "bugz" not in url:
148 |         # Not a bugzilla url. We don't understand you.
149 |         print(f"Can't figure out bug number for non-bugzilla url: {url}")
150 |         return None
151 | 
152 |     bug = BUG_NUMBER_PATTERN.search(url)
153 |     if bug is not None:
154 |         try:
155 |             bug = int(bug[0])
156 |         except Exception:
157 |             print(f"Can't figure out bug number for url: {url}")
158 |             return None
159 |         return bug
160 | 
161 | 
162 | def file_bugs(
163 |     expiring_metrics: Dict[str, Dict],
164 |     latest_nightly_version: str,
165 |     bugzilla_api_key: str,
166 |     dry_run: bool = True,
167 | ) -> Dict[str, List[str]]:
168 |     """
169 |     Find existing and file new Bugzilla bugs for expiring metrics.
170 |     Needs a network connection.
171 |     If `dry_run`, doesn't file any new bugs, returning a fake bug url for all expiring metrics.
172 |     """
173 | 
174 |     next_version = str(int(latest_nightly_version) + 1)
175 | 
176 |     # We try our best to reuse pieces of probe_expiry_alert.
177 |     # Swizzle and filter expiring_metrics into a list of ProbeDetails structs.
178 |     expiring_probes: List[probe_expiry_alert.ProbeDetails] = []
179 |     for metric_name, metric in expiring_metrics.items():
180 |         bug_numbers: List[Optional[int]] = [
181 |             bug_number_from_url(url) for url in metric["bugs"]
182 |         ]
183 |         biggest_bug_number: Optional[int] = max(
184 |             [bug for bug in bug_numbers if bug is not None], default=None
185 |         )
186 |         if biggest_bug_number is not None:
187 |             product, component = probe_expiry_alert.get_bug_component(
188 |                 biggest_bug_number, bugzilla_api_key
189 |             )
190 |         else:
191 |             product, component = None, None
192 |         if product is None and component is None:
193 |             product = probe_expiry_alert.BUG_DEFAULT_PRODUCT
194 |             component = probe_expiry_alert.BUG_DEFAULT_COMPONENT
195 | 
196 |         expiring_probes.append(
197 |             probe_expiry_alert.ProbeDetails(
198 |                 metric_name,
199 |                 product,
200 |                 component,
201 |                 metric.get("notification_emails", []),
202 |                 biggest_bug_number,
203 |             )
204 |         )
205 | 
206 |     # Debug print time
207 |     print(f"Found {len(expiring_probes)} 'probes' expiring in nightly {next_version}:")
208 |     print([probe.name for probe in expiring_probes])
209 | 
210 |     metrics_to_bug_numbers = probe_expiry_alert.file_bugs(
211 |         expiring_probes,
212 |         str(latest_nightly_version),
213 |         bugzilla_api_key,
214 |         dry_run,
215 |         BUG_WHITEBOARD_TAG,
216 |         BUG_SUMMARY_TEMPLATE,
217 |         BUG_DESCRIPTION_TEMPLATE,
218 |     )
219 | 
220 |     # Swizzle out to a metric_name -> List[bug urls] dict
221 |     bug_urls_to_metrics = defaultdict(list)
222 |     for metric_name, bug_number in metrics_to_bug_numbers.items():
223 |         bug_urls_to_metrics[
224 |             probe_expiry_alert.BUGZILLA_BUG_LINK_TEMPLATE.format(bug_id=bug_number)
225 |         ].append(metric_name)
226 | 
227 |     if dry_run:
228 |         return {"https://example.com/fake_bug_url/": expiring_metrics.keys()}
229 | 
230 |     return bug_urls_to_metrics
231 | 
232 | 
233 | def file_bugs_and_get_emails_for_expiring_metrics(
234 |     repositories: List[Repository],
235 |     metrics_by_repo: Dict[str, Dict[str, Dict[str, Any]]],
236 |     bugzilla_api_key: Optional[str],
237 |     dry_run: bool = True,
238 | ) -> Optional[Dict[str, EmailInfo]]:
239 |     """
240 |     If the provided repositories and metrics contain FOG-using repos:
241 |      * Determine which metrics are expiring in the next version.
242 |      * File bugs in Bugzilla for them, in the product and component of the most recent bug.
243 |        At most one bug per metric category. (Doesn't happen if you don't provide an API key.)
244 |      * Return a list of emails to send. At most one per FOG repo.
245 |     """
246 | 
247 |     if len(FOG_REPOS & metrics_by_repo.keys()) == 0:
248 |         print("No FOG-using repositories. Nothing to do.")
249 |         return None
250 | 
251 |     # Glean repositories have a default list of notification emails we should include as well.
252 |     repo_addresses = {
253 |         repo.name: repo.notification_emails
254 |         for repo in repositories
255 |         if repo.name in FOG_REPOS
256 |     }
257 | 
258 |     current_metrics_by_repo = get_current_metrics_by_repo(metrics_by_repo)
259 | 
260 |     emails = {}
261 |     for fog_repo in FOG_REPOS:
262 |         if fog_repo not in metrics_by_repo:
263 |             continue
264 |         current_metrics: Dict[str, Dict] = current_metrics_by_repo[fog_repo]
265 |         latest_nightly_version: str = probe_expiry_alert.get_latest_nightly_version()
266 |         expiring_metrics: Dict[str, Dict] = get_expiring_metrics(
267 |             current_metrics, latest_nightly_version
268 |         )
269 | 
270 |         print(f"Found {len(expiring_metrics)} expiring metrics in {fog_repo}.")
271 |         if len(expiring_metrics) == 0:
272 |             continue
273 | 
274 |         metrics_addresses = set(repo_addresses[fog_repo])
275 |         for metric in expiring_metrics.values():
276 |             metrics_addresses.update(metric["notification_emails"])
277 |         addresses = list(metrics_addresses)
278 | 
279 |         filed_bugs: Dict[str, List[str]] = file_bugs(
280 |             expiring_metrics, latest_nightly_version, bugzilla_api_key, dry_run
281 |         )
282 | 
283 |         expiring_bugs_list = []
284 |         for bug_url, bug_metrics in filed_bugs.items():
285 |             # Sort the metric names for easier reading
286 |             bug_metrics = list(bug_metrics)
287 |             bug_metrics.sort()
288 | 
289 |             expiring_metrics_list_str = "\n".join(bug_metrics)
290 |             expiring_bugs_list.append(f"{bug_url}:\n{expiring_metrics_list_str}")
291 | 
292 |         # Nothing expiring? No emails needed.
293 |         if len(expiring_bugs_list) == 0:
294 |             continue
295 | 
296 |         emails[f"expired_metrics_{fog_repo}"] = EmailInfo(
297 |             emails=[
298 |                 {
299 |                     "subject": f"Expired metrics in {fog_repo}",
300 |                     "message": EXPIRED_METRICS_EMAIL_TEMPLATE.format(
301 |                         expiring_bugs_list="\n".join(expiring_bugs_list),
302 |                         version=int(latest_nightly_version),
303 |                     ),
304 |                 }
305 |             ],
306 |             addresses=addresses,
307 |         )
308 | 
309 |     return emails
310 | 


--------------------------------------------------------------------------------
/probe_scraper/glean_push.py:
--------------------------------------------------------------------------------
 1 | """Google Cloud Function for scraping glean probes from a single commit."""
 2 | 
 3 | import argparse
 4 | import json
 5 | import os
 6 | import tempfile
 7 | from pathlib import Path
 8 | from unittest.mock import Mock
 9 | 
10 | from flask import Request, Response
11 | 
12 | from . import runner
13 | from .exc import ProbeScraperError
14 | 
15 | 
16 | def main(request: Request) -> Response:
17 |     """Scrape probes from a single glean commit."""
18 |     output_bucket = os.environ.get("OUTPUT_BUCKET", None)
19 |     if output_bucket is None:
20 |         return Response("Cloud function has no configured output bucket\n", 500)
21 | 
22 |     args = request.get_json(force=True)
23 |     if not isinstance(args, dict):
24 |         return Response(f"request body must be a JSON object but got: {args}\n", 400)
25 |     try:
26 |         url = args["url"]
27 |         commit = args["commit"]
28 |         branch = args["branch"]
29 |     except KeyError as e:
30 |         return Response(f"request JSON missing key: {e}\n", 400)
31 | 
32 |     if not isinstance(url, str):
33 |         return Response("Error: url must be a string\n", 400)
34 |     if not isinstance(commit, str):
35 |         return Response("Error: commit must be a string\n", 400)
36 |     if not isinstance(branch, str):
37 |         return Response("Error: branch must be a string\n", 400)
38 | 
39 |     with tempfile.TemporaryDirectory() as tmpdirname:
40 |         tmp = Path(tmpdirname)
41 |         out_dir = tmp / "output"
42 |         cache_dir = tmp / "cache"
43 |         email_file = tmp / "emails.txt"
44 |         out_dir.mkdir()
45 |         cache_dir.mkdir()
46 |         try:
47 |             updated_paths = runner.main(
48 |                 cache_dir=cache_dir,
49 |                 out_dir=out_dir,
50 |                 firefox_version=None,
51 |                 min_firefox_version=None,
52 |                 process_moz_central_probes=False,
53 |                 process_glean_metrics=True,
54 |                 repositories_file=Path("repositories.yaml"),
55 |                 dry_run=True,
56 |                 glean_repos=None,
57 |                 firefox_channel=None,
58 |                 output_bucket=output_bucket,
59 |                 cache_bucket=None,
60 |                 env="prod",
61 |                 bugzilla_api_key=None,
62 |                 glean_urls=[url],
63 |                 glean_commit=commit,
64 |                 glean_commit_branch=branch,
65 |                 update=True,
66 |                 email_file=email_file,
67 |             )
68 |         except ProbeScraperError as e:
69 |             return Response(f"Error: {e.message}\n", e.status_code)
70 |         if updated_paths:
71 |             updates = ", ".join(str(p.relative_to(out_dir)) for p in updated_paths)
72 |             message = f"update published for {updates}\n"
73 |         else:
74 |             message = "update is valid, but not published\n"
75 |         try:
76 |             emails = email_file.read_text()
77 |         except FileNotFoundError:
78 |             pass  # no emails means no warnings or expiring metrics, which is good
79 |         else:
80 |             message += f"additional messages: {emails}\n"
81 |         return Response(message, 200)
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     _parser = argparse.ArgumentParser()
86 |     _parser.add_argument(
87 |         "data",
88 |         help="JSON format data describing the glean commit or branch to push",
89 |         type=str,
90 |     )
91 |     _args = _parser.parse_args()
92 |     _data = json.loads(_args.data)
93 |     _request = Mock(get_json=Mock(return_value=_data), args=_data)
94 |     _response = main(_request)
95 |     print(f"HTTP {_response.status_code}: {_response.data.decode()}")
96 | 


--------------------------------------------------------------------------------
/probe_scraper/model_validation.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from jsonschema import Draft7Validator, RefResolver, validators
 3 | 
 4 | API_FILENAME = "probeinfo_api.yaml"
 5 | with open(API_FILENAME, "r") as f:
 6 |     API = yaml.load(f, Loader=yaml.SafeLoader)
 7 | SCHEMAS = API["components"]["schemas"]
 8 | RESOLVER = RefResolver("", API)
 9 | 
10 | 
11 | def extend_with_default(validator_class):
12 |     """
13 |     Apply default values from the schema when not present.
14 | 
15 |     See https://python-jsonschema.readthedocs.io/en/stable/faq/
16 |     """
17 |     validate_properties = validator_class.VALIDATORS["properties"]
18 | 
19 |     def set_defaults(validator, properties, instance, schema):
20 |         for property, subschema in properties.items():
21 |             if "default" in subschema:
22 |                 instance.setdefault(property, subschema["default"])
23 | 
24 |         for error in validate_properties(
25 |             validator,
26 |             properties,
27 |             instance,
28 |             schema,
29 |         ):
30 |             yield error
31 | 
32 |     return validators.extend(
33 |         validator_class,
34 |         {"properties": set_defaults},
35 |     )
36 | 
37 | 
38 | Validator = extend_with_default(Draft7Validator)
39 | 
40 | 
41 | def validate_as(instance, model_name):
42 |     schema = SCHEMAS[model_name]
43 |     Draft7Validator(schema, resolver=RESOLVER).validate(instance)
44 | 
45 | 
46 | def apply_defaults_and_validate(instance, model_name):
47 |     schema = SCHEMAS[model_name]
48 |     Validator(schema, resolver=RESOLVER).validate(instance)
49 |     # Send through validation again to be sure any inject default values
50 |     # still validate with the schema.
51 |     validate_as(instance, model_name)
52 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/probe-scraper/0506e31f38e61ddc662c0eab18826b370314896e/probe_scraper/parsers/__init__.py


--------------------------------------------------------------------------------
/probe_scraper/parsers/events.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from .third_party import parse_events
 6 | from .utils import get_major_version, set_in_nested_dict
 7 | 
 8 | 
 9 | def extract_events_data(e):
10 |     props = {
11 |         # source_field: target_field
12 |         # TODO: extract description.
13 |         "description": "description",
14 |         "expiry_version": "expiry_version",
15 |         "expiry_day": "expiry_day",
16 |         "cpp_guard": "cpp_guard",
17 |         "bug_numbers": "bug_numbers",
18 |         "methods": "details/methods",
19 |         "objects": "details/objects",
20 |         "record_in_processes": "details/record_in_processes",
21 |         # TODO: extract key descriptions too.
22 |         "extra_keys": "details/extra_keys",
23 |     }
24 | 
25 |     defaults = {
26 |         "expiry_version": "never",
27 |         "expiry_day": "never",
28 |         "name": e.methods[0],
29 |         "description": e.description,
30 |         "cpp_guard": None,
31 |         "bug_numbers": [],
32 |     }
33 | 
34 |     data = {"details": {}}
35 | 
36 |     for source_field, target_field in props.items():
37 |         value = getattr(e, source_field, e._definition.get(source_field, None))
38 |         if value is None and source_field in defaults:
39 |             value = defaults[source_field]
40 |         set_in_nested_dict(data, target_field, value)
41 | 
42 |     # We only care about opt-out or opt-in really.
43 |     optout = getattr(e, "dataset", "").endswith("_OPTOUT")
44 |     data["optout"] = optout
45 | 
46 |     # Normalize some field values.
47 |     data["expiry_version"] = get_major_version(data["expiry_version"])
48 |     if data["expiry_version"] == "default":
49 |         data["expiry_version"] = "never"
50 | 
51 |     return data
52 | 
53 | 
54 | class EventsParser:
55 |     def parse(self, filenames, version=None, channel=None):
56 |         # Events.yaml had a format change in 53, see bug 1329620.
57 |         # We don't have important event usage yet, so lets skip
58 |         # backwards compatibility for now.
59 |         if (version and channel) and (
60 |             (
61 |                 (channel != "nightly" and version < 53)
62 |                 or (channel == "nightly" and version < 54)
63 |             )
64 |         ):
65 |             return {}
66 | 
67 |         if len(filenames) > 1:
68 |             raise Exception("We don't support loading from more than one file.")
69 | 
70 |         events = parse_events.load_events(filenames[0], strict_type_checks=False)
71 | 
72 |         # Get the probe information in a standard format.
73 |         out = {}
74 |         for e in events:
75 |             full_name = e.category + "." + e.methods[0]
76 |             if getattr(e, "name", None):
77 |                 full_name += "#" + e.name
78 |             out[full_name] = extract_events_data(e)
79 | 
80 |         return out
81 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/histograms.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from .third_party import histogram_tools
 6 | from .utils import get_major_version, set_in_nested_dict
 7 | 
 8 | 
 9 | def extract_histogram_data(histogram, version):
10 |     props = {
11 |         # source_field: target_field
12 |         "cpp_guard": "cpp_guard",
13 |         "description": "description",
14 |         "expiration": "expiry_version",
15 |         "bug_numbers": "bug_numbers",
16 |         "alert_emails": "notification_emails",
17 |         "n_buckets": "details/n_buckets",
18 |         "low": "details/low",
19 |         "high": "details/high",
20 |         "keyed": "details/keyed",
21 |         "kind": "details/kind",
22 |         "record_in_processes": "details/record_in_processes",
23 |         "record_into_store": "details/record_into_store",
24 |     }
25 | 
26 |     defaults = {
27 |         "cpp_guard": None,
28 |         "keyed": False,
29 |         "expiration": "never",
30 |         "bug_numbers": [],
31 |         "alert_emails": [],
32 |     }
33 | 
34 |     data = {"details": {}}
35 | 
36 |     for source_field, target_field in props.items():
37 |         value = None
38 |         if hasattr(histogram, source_field):
39 |             value = getattr(histogram, source_field)()
40 |         elif source_field in histogram._definition:
41 |             value = histogram._definition.get(source_field)
42 |         elif source_field in defaults:
43 |             value = defaults[source_field]
44 |         set_in_nested_dict(data, target_field, value)
45 | 
46 |     # Only include labels if the histogram is categorical.
47 |     if histogram.kind() == "categorical":
48 |         set_in_nested_dict(data, "details/labels", histogram.labels())
49 | 
50 |     # We only care about opt-out or opt-in really.
51 |     optout = False
52 |     if hasattr(histogram, "dataset"):
53 |         optout = getattr(histogram, "dataset")().endswith("_OPTOUT")
54 | 
55 |     # Use Counters are shipped on release since 65.
56 |     # If the parsers would set this flag, we couldn't differentiate between versions.
57 |     if int(version) >= 65:
58 |         if histogram.name().startswith("USE_COUNTER2_"):
59 |             optout = True
60 | 
61 |     data["optout"] = optout
62 | 
63 |     # Normalize some field values.
64 |     data["expiry_version"] = get_major_version(data["expiry_version"])
65 |     if data["expiry_version"] == "default":
66 |         data["expiry_version"] = "never"
67 |     if data["details"]["keyed"] == "true":
68 |         data["details"]["keyed"] = True
69 | 
70 |     # TODO: Fixup old non-number values & expressions.
71 |     # History: bug 920169, bug 1245910
72 |     # "JS::gcreason::NUM_TELEMETRY_REASONS"
73 |     # "JS::gcreason::NUM_TELEMETRY_REASONS+1"
74 |     # "mozilla::StartupTimeline::MAX_EVENT_ID"
75 | 
76 |     return data
77 | 
78 | 
79 | def transform_probe_info(probes, version):
80 |     return dict(
81 |         (probe.name(), extract_histogram_data(probe, version)) for probe in probes
82 |     )
83 | 
84 | 
85 | class HistogramsParser:
86 |     def parse(self, filenames, version=None, channel=None):
87 |         # Call the histogram tools for each file.
88 |         parsed_probes = list(histogram_tools.from_files(filenames))
89 | 
90 |         # Get the probe information in a standard format.
91 |         return transform_probe_info(parsed_probes, version)
92 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/metrics.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | from glean_parser.parser import parse_objects
 8 | 
 9 | from .pings import normalize_ping_name
10 | from .utils import get_source_url
11 | 
12 | 
13 | class GleanMetricsParser:
14 |     """
15 |     Use the [Glean Parser]
16 |     (https://mozilla.github.io/glean_parser)
17 |     to parse the metrics.yaml files.
18 |     """
19 | 
20 |     def parse(self, filenames, config, repo_url=None, commit_hash=None):
21 |         config = config.copy()
22 |         config["do_not_disable_expired"] = True
23 | 
24 |         paths = [Path(fname) for fname in filenames]
25 |         paths = [path for path in paths if path.is_file()]
26 |         results = parse_objects(paths, config)
27 |         errors = [err for err in results]
28 | 
29 |         metrics = {
30 |             metric.identifier(): metric.serialize()
31 |             for category, probes in results.value.items()
32 |             for probe_name, metric in probes.items()
33 |         }
34 | 
35 |         for v in metrics.values():
36 |             v["send_in_pings"] = [normalize_ping_name(p) for p in v["send_in_pings"]]
37 |             if repo_url and commit_hash:
38 |                 v["source_url"] = get_source_url(v["defined_in"], repo_url, commit_hash)
39 |             # the 'defined_in' structure is no longer needed
40 |             del v["defined_in"]
41 |         return metrics, errors
42 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/pings.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | from glean_parser.parser import parse_objects
 8 | 
 9 | from .utils import get_source_url
10 | 
11 | PING_NAME_NORMALIZATION = {
12 |     "deletion_request": "deletion-request",
13 |     "bookmarks_sync": "bookmarks-sync",
14 |     "history_sync": "history-sync",
15 |     "session_end": "session-end",
16 | }
17 | 
18 | 
19 | def normalize_ping_name(name):
20 |     return PING_NAME_NORMALIZATION.get(name, name)
21 | 
22 | 
23 | def generate_definition(ping_data, repo_url, commit_hash):
24 |     serialized = ping_data.serialize()
25 |     if repo_url and commit_hash:
26 |         serialized["source_url"] = get_source_url(
27 |             serialized["defined_in"], repo_url, commit_hash
28 |         )
29 |         # the 'defined_in' structure is no longer needed
30 |         del serialized["defined_in"]
31 |     return serialized
32 | 
33 | 
34 | class GleanPingsParser:
35 |     """
36 |     Use the [Glean Parser]
37 |     (https://mozilla.github.io/glean_parser)
38 |     to parse the pings.yaml files.
39 |     """
40 | 
41 |     def parse(self, filenames, config, repo_url=None, commit_hash=None):
42 |         config = config.copy()
43 |         paths = [Path(fname) for fname in filenames]
44 |         paths = [path for path in paths if path.is_file()]
45 |         results = parse_objects(paths, config)
46 |         errors = [err for err in results]
47 | 
48 |         pings = {
49 |             normalize_ping_name(ping_name): generate_definition(
50 |                 ping_data, repo_url, commit_hash
51 |             )
52 |             for category, pings in results.value.items()
53 |             for ping_name, ping_data in pings.items()
54 |         }
55 | 
56 |         return pings, errors
57 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/repositories.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | import copy
  6 | 
  7 | import yaml
  8 | 
  9 | from probe_scraper import model_validation
 10 | 
 11 | REPOSITORIES_FILENAME = "repositories.yaml"
 12 | 
 13 | 
 14 | def remove_none(obj):
 15 |     """
 16 |     Recursively traverses a dict or list, removing all dict items where the value
 17 |     is None. This helps us meet the existing probeinfo API contract and sidesteps
 18 |     an awkward incompatibility between JSON schemas and OpenAPI schemas, which use
 19 |     incompatible constructs for marking fields as nullable.
 20 | 
 21 |     Implementation from https://stackoverflow.com/a/20558778
 22 |     """
 23 |     if isinstance(obj, (list, tuple, set)):
 24 |         return type(obj)(remove_none(x) for x in obj if x is not None)
 25 |     elif isinstance(obj, dict):
 26 |         return type(obj)(
 27 |             (remove_none(k), remove_none(v))
 28 |             for k, v in obj.items()
 29 |             if k is not None and v is not None
 30 |         )
 31 |     else:
 32 |         return obj
 33 | 
 34 | 
 35 | class Repository(object):
 36 |     """
 37 |     A class representing a repository, read in from `repositories.yaml`
 38 |     """
 39 | 
 40 |     def __init__(self, name, definition):
 41 |         self.name = name
 42 |         self.url = definition.get("url")
 43 |         self.branch = definition.get("branch", None)
 44 |         self.notification_emails = definition.get("notification_emails")
 45 |         self.app_id = definition.get("app_id")
 46 |         self.description = definition.get("description")
 47 |         self.channel = definition.get("channel")
 48 |         self.deprecated = definition.get("deprecated", False)
 49 |         self.metrics_file_paths = definition.get("metrics_files", [])
 50 |         self.ping_file_paths = definition.get("ping_files", [])
 51 |         self.tag_file_paths = definition.get("tag_files", [])
 52 |         self.library_names = definition.get("library_names", None)
 53 |         self.dependencies = definition.get("dependencies", [])
 54 |         self.prototype = definition.get("prototype", False)
 55 |         self.retention_days = definition.get("retention_days", None)
 56 |         self.encryption = definition.get("encryption", None)
 57 |         self.skip_documentation = definition.get("skip_documentation", False)
 58 |         self.moz_pipeline_metadata_defaults = definition.get(
 59 |             "moz_pipeline_metadata_defaults", {}
 60 |         )
 61 |         self.moz_pipeline_metadata = definition.get("moz_pipeline_metadata", {})
 62 | 
 63 |     def get_metrics_file_paths(self):
 64 |         return self.metrics_file_paths
 65 | 
 66 |     def get_ping_file_paths(self):
 67 |         return self.ping_file_paths
 68 | 
 69 |     def get_change_files(self):
 70 |         return self.metrics_file_paths + self.ping_file_paths + self.tag_file_paths
 71 | 
 72 |     def get_dependencies(self):
 73 |         return self.dependencies
 74 | 
 75 |     def to_dict(self):
 76 |         # Remove null elements
 77 |         # https://google.github.io/styleguide/jsoncstyleguide.xml#Empty/Null_Property_Values
 78 |         return {k: v for k, v in list(self.__dict__.items()) if v is not None}
 79 | 
 80 | 
 81 | class RepositoriesParser(object):
 82 |     """
 83 |     A parser for `repositories.yaml` files, which both validates and retrieves Repository objects
 84 |     """
 85 | 
 86 |     def _get_repos(self, filename=None):
 87 |         if filename is None:
 88 |             filename = REPOSITORIES_FILENAME
 89 | 
 90 |         with open(filename, "r") as f:
 91 |             repos = yaml.load(f, Loader=yaml.SafeLoader)
 92 | 
 93 |         version = repos.get("version", "1")
 94 |         if version == "1":
 95 |             return repos
 96 |         else:
 97 |             return self._v2_to_v1(filename)
 98 | 
 99 |     def validate(self, filename=None):
100 |         data = self._get_repos(filename)
101 |         model_validation.validate_as(data, "RepositoriesYamlV1")
102 | 
103 |     def parse(self, filename=None):
104 |         """
105 |         Parse the given filename as a set of repository definitions for v1 endpoints.
106 | 
107 |         The passed file can either be in the old RepositoriesYamlV1 format
108 |         or the current RepositoriesYamlV2 format, in which case it will be
109 |         "downgraded" to v1 format. This is to maintain existing code and output for
110 |         the v1 probeinfo endpoints.
111 | 
112 |         New endpoints should be built with the data format returned from parse_v2
113 |         rather than this function.
114 |         """
115 |         self.validate(filename)
116 |         repos = self._get_repos(filename)
117 | 
118 |         repos = [
119 |             Repository(name, definition) for name, definition in list(repos.items())
120 |         ]
121 | 
122 |         return repos
123 | 
124 |     def parse_v2(self, filename=None) -> dict:
125 |         """
126 |         Parse the given filename as a set of repository definitions.
127 | 
128 |         The passed file must be in the current RepositoriesYamlV2 format.
129 |         """
130 |         with open(filename or REPOSITORIES_FILENAME, "r") as f:
131 |             data = yaml.load(f, Loader=yaml.SafeLoader)
132 |         model_validation.apply_defaults_and_validate(data, "RepositoriesYamlV2")
133 |         repos = data
134 | 
135 |         app_listings = []
136 |         for app in repos["applications"]:
137 |             channels = app.pop("channels")
138 |             for channel in channels:
139 |                 dependencies = app.get("dependencies", []) + channel.pop(
140 |                     "additional_dependencies", []
141 |                 )
142 |                 listing = {**app, **channel}
143 |                 listing["dependencies"] = dependencies
144 |                 app_id = listing["app_id"]
145 |                 listing["document_namespace"] = (
146 |                     app_id.lower().replace("_", "-").replace(".", "-")
147 |                 )
148 |                 listing["bq_dataset_family"] = (
149 |                     app_id.lower().replace("-", "_").replace(".", "_")
150 |                 )
151 |                 # Need a deepcopy to ensure the dictionary values remain distinct.
152 |                 listing = copy.deepcopy(listing)
153 |                 model_validation.validate_as(listing, "AppListing")
154 |                 app_listings.append(listing)
155 | 
156 |         library_variants = []
157 |         for lib in repos["libraries"]:
158 |             variants = lib.pop("variants")
159 |             for variant in variants:
160 |                 listing = {**lib, **variant}
161 |                 model_validation.validate_as(listing, "LibraryVariant")
162 |                 library_variants.append(listing)
163 | 
164 |         return {
165 |             "library-variants": library_variants,
166 |             "app-listings": app_listings,
167 |         }
168 | 
169 |     def _v2_to_v1(self, filename):
170 |         repos_v2 = self.parse_v2(filename)
171 |         repos = {}
172 |         for lib in repos_v2["library-variants"]:
173 |             v1_name = lib["v1_name"]
174 |             lib["library_names"] = [lib["dependency_name"]]
175 |             lib["app_id"] = v1_name
176 |             del lib["library_name"]
177 |             del lib["dependency_name"]
178 |             del lib["v1_name"]
179 |             repos[v1_name] = lib
180 |         for app in repos_v2["app-listings"]:
181 |             app_channel = app.pop("app_channel", None)
182 |             if app_channel is not None:
183 |                 app["channel"] = app_channel
184 |             v1_name = app.pop("v1_name")
185 |             app.pop("app_name")
186 |             app.pop("canonical_app_name", None)
187 |             app.pop("bq_dataset_family")
188 |             app_description = app.pop("app_description", None)
189 |             app["description"] = app.get("description", app_description)
190 |             namespace = app.pop("document_namespace")
191 |             app["app_id"] = namespace
192 |             repos[v1_name] = app
193 |         return repos
194 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/scalars.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from .third_party import parse_scalars
 6 | from .utils import get_major_version
 7 | 
 8 | 
 9 | def extract_scalar_data(s):
10 | 
11 |     # External scalars.yaml files have release/prerelease, not opt-in/opt-out
12 |     try:
13 |         optout = s.dataset.endswith("_OPTOUT")
14 |     except KeyError:
15 |         optout = s._definition.get("collect_on_channels", "prerelease") == "release"
16 | 
17 |     return {
18 |         "description": s.description,
19 |         "expiry_version": get_major_version(s.expires),
20 |         "cpp_guard": s.cpp_guard,
21 |         "optout": optout,
22 |         "bug_numbers": s.bug_numbers,
23 |         "notification_emails": s.notification_emails,
24 |         "details": {
25 |             "keyed": s.keyed,
26 |             "kind": s.kind,
27 |             "record_in_processes": s.record_in_processes,
28 |             "record_into_store": s.record_into_store,
29 |         },
30 |     }
31 | 
32 | 
33 | def transform_scalar_info(probes):
34 |     return dict((probe.label, extract_scalar_data(probe)) for probe in probes)
35 | 
36 | 
37 | class ScalarsParser:
38 |     def parse(self, filenames, version=None, channel=None):
39 |         if len(filenames) > 1:
40 |             raise Exception("We don't support loading from more than one file.")
41 | 
42 |         scalars = parse_scalars.load_scalars(filenames[0], strict_type_checks=False)
43 | 
44 |         # Get the probe information in a standard format.
45 |         return transform_scalar_info(scalars)
46 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/tags.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | from glean_parser.parser import parse_objects
 8 | 
 9 | from .utils import get_source_url
10 | 
11 | 
12 | class GleanTagsParser:
13 |     """
14 |     Use the [Glean Parser]
15 |     (https://mozilla.github.io/glean_parser)
16 |     to parse tags.yaml files.
17 |     """
18 | 
19 |     def parse(self, filenames, config, repo_url=None, commit_hash=None):
20 |         config = config.copy()
21 |         paths = [Path(fname) for fname in filenames]
22 |         paths = [path for path in paths if path.is_file()]
23 |         results = parse_objects(paths, config)
24 |         errors = [err for err in results]
25 |         tags = {
26 |             tag_name: tag_data.serialize()
27 |             for tag_name, tag_data in results.value.get("tags", {}).items()
28 |         }
29 | 
30 |         for v in tags.values():
31 |             if repo_url and commit_hash:
32 |                 v["source_url"] = get_source_url(v["defined_in"], repo_url, commit_hash)
33 |             # the 'defined_in' structure is no longer needed
34 |             del v["defined_in"]
35 |         return tags, errors
36 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/third_party/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/probe-scraper/0506e31f38e61ddc662c0eab18826b370314896e/probe_scraper/parsers/third_party/__init__.py


--------------------------------------------------------------------------------
/probe_scraper/parsers/third_party/shared_telemetry_utils.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | # This file contains utility functions shared by the scalars and the histogram generation
  6 | # scripts.
  7 | 
  8 | 
  9 | import re
 10 | import sys
 11 | 
 12 | import yaml
 13 | 
 14 | # This is a list of flags that determine which process a measurement is allowed
 15 | # to record from.
 16 | KNOWN_PROCESS_FLAGS = {
 17 |     "all": "All",
 18 |     "all_children": "AllChildren",
 19 |     "main": "Main",
 20 |     "content": "Content",
 21 |     "gpu": "Gpu",
 22 |     "socket": "Socket",
 23 |     # Historical Values
 24 |     "all_childs": "AllChildren",  # Supporting files from before bug 1363725
 25 | }
 26 | 
 27 | PROCESS_ENUM_PREFIX = "mozilla::Telemetry::Common::RecordedProcessType::"
 28 | 
 29 | 
 30 | class ParserError(Exception):
 31 |     """Thrown by different probe parsers. Errors are partitioned into
 32 |     'immediately fatal' and 'eventually fatal' so that the parser can print
 33 |     multiple error messages at a time. See bug 1401612 ."""
 34 | 
 35 |     eventual_errors = []
 36 | 
 37 |     def __init__(self, *args):
 38 |         Exception.__init__(self, *args)
 39 | 
 40 |     def handle_later(self):
 41 |         ParserError.eventual_errors.append(self)
 42 | 
 43 |     def handle_now(self):
 44 |         ParserError.print_eventuals()
 45 |         print(self.message, file=sys.stderr)
 46 |         sys.exit(1)
 47 | 
 48 |     @classmethod
 49 |     def print_eventuals(cls):
 50 |         while cls.eventual_errors:
 51 |             print(cls.eventual_errors.pop(0).message, file=sys.stderr)
 52 | 
 53 |     @classmethod
 54 |     def exit_func(cls):
 55 |         if cls.eventual_errors:
 56 |             cls("Some errors occurred").handle_now()
 57 | 
 58 | 
 59 | def is_valid_process_name(name):
 60 |     return name in KNOWN_PROCESS_FLAGS
 61 | 
 62 | 
 63 | def process_name_to_enum(name):
 64 |     return PROCESS_ENUM_PREFIX + KNOWN_PROCESS_FLAGS.get(name)
 65 | 
 66 | 
 67 | class StringTable:
 68 |     """Manages a string table and allows C style serialization to a file."""
 69 | 
 70 |     def __init__(self):
 71 |         self.current_index = 0
 72 |         self.table = {}
 73 | 
 74 |     def c_strlen(self, string):
 75 |         """The length of a string including the null terminating character.
 76 |         :param string: the input string.
 77 |         """
 78 |         return len(string) + 1
 79 | 
 80 |     def stringIndex(self, string):
 81 |         """Returns the index in the table of the provided string. Adds the string to
 82 |         the table if it's not there.
 83 |         :param string: the input string.
 84 |         """
 85 |         if string in self.table:
 86 |             return self.table[string]
 87 |         else:
 88 |             result = self.current_index
 89 |             self.table[string] = result
 90 |             self.current_index += self.c_strlen(string)
 91 |             return result
 92 | 
 93 |     def stringIndexes(self, strings):
 94 |         """Returns a list of indexes for the provided list of strings.
 95 |         Adds the strings to the table if they are not in it yet.
 96 |         :param strings: list of strings to put into the table.
 97 |         """
 98 |         return [self.stringIndex(s) for s in strings]
 99 | 
100 |     def writeDefinition(self, f, name):
101 |         """Writes the string table to a file as a C const char array.
102 | 
103 |         This writes out the string table as one single C char array for memory
104 |         size reasons, separating the individual strings with '\0' characters.
105 |         This way we can index directly into the string array and avoid the additional
106 |         storage costs for the pointers to them (and potential extra relocations for those).
107 | 
108 |         :param f: the output stream.
109 |         :param name: the name of the output array.
110 |         """
111 |         entries = list(self.table.items())
112 |         entries.sort(key=lambda x: x[1])
113 | 
114 |         # Avoid null-in-string warnings with GCC and potentially
115 |         # overlong string constants; write everything out the long way.
116 |         def explodeToCharArray(string):
117 |             def toCChar(s):
118 |                 if s == "'":
119 |                     return "'\\''"
120 |                 else:
121 |                     return "'%s'" % s
122 | 
123 |             return ", ".join(map(toCChar, string))
124 | 
125 |         f.write("const char %s[] = {\n" % name)
126 |         for string, offset in entries:
127 |             if "*/" in string:
128 |                 raise ValueError(
129 |                     "String in string table contains unexpected sequence '*/': %s"
130 |                     % string
131 |                 )
132 | 
133 |             e = explodeToCharArray(string)
134 |             if e:
135 |                 f.write(
136 |                     "  /* %5d - \"%s\" */ %s, '\\0',\n"
137 |                     % (offset, string, explodeToCharArray(string))
138 |                 )
139 |             else:
140 |                 f.write("  /* %5d - \"%s\" */ '\\0',\n" % (offset, string))
141 |         f.write("};\n\n")
142 | 
143 | 
144 | def static_assert(output, expression, message):
145 |     """Writes a C++ compile-time assertion expression to a file.
146 |     :param output: the output stream.
147 |     :param expression: the expression to check.
148 |     :param message: the string literal that will appear if the expression evaluates to
149 |         false.
150 |     """
151 |     print('static_assert(%s, "%s");' % (expression, message), file=output)
152 | 
153 | 
154 | def validate_expiration_version(expiration):
155 |     """Makes sure the expiration version has the expected format.
156 | 
157 |     Allowed examples: "1.0", "20", "300.0a1", "60.0a1", "30.5a1", "never"
158 |     Disallowed examples: "Never", "asd", "4000000", "60a1"
159 | 
160 |     :param expiration: the expiration version string.
161 |     :return: True if the expiration validates correctly, False otherwise.
162 |     """
163 |     if expiration != "never" and not re.match(r"^\d{1,3}(\.\d|\.\da1)?$", expiration):
164 |         return False
165 | 
166 |     return True
167 | 
168 | 
169 | def add_expiration_postfix(expiration):
170 |     """Formats the expiration version and adds a version postfix if needed.
171 | 
172 |     :param expiration: the expiration version string.
173 |     :return: the modified expiration string.
174 |     """
175 |     if re.match(r"^[1-9][0-9]*$", expiration):
176 |         return expiration + ".0a1"
177 | 
178 |     if re.match(r"^[1-9][0-9]*\.0$", expiration):
179 |         return expiration + "a1"
180 | 
181 |     return expiration
182 | 
183 | 
184 | def load_yaml_file(filename):
185 |     """Load a YAML file from disk, throw a ParserError on failure."""
186 |     try:
187 |         with open(filename, "r") as f:
188 |             return yaml.safe_load(f)
189 |     except IOError as e:
190 |         raise ParserError("Error opening " + filename + ": " + e.message)
191 |     except ValueError as e:
192 |         raise ParserError(
193 |             "Error parsing processes in {}: {}".format(filename, e.message)
194 |         )
195 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/third_party/usecounters.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | import collections
 6 | import re
 7 | import sys
 8 | 
 9 | 
10 | def read_conf(conf_filename):
11 |     # Can't read/write from a single StringIO, so make a new one for reading.
12 |     stream = open(conf_filename)
13 | 
14 |     def parse_counters(stream):
15 |         for line_num, line in enumerate(stream):
16 |             line = line.rstrip("\n")
17 |             if not line or line.startswith("//"):
18 |                 # empty line or comment
19 |                 continue
20 |             m = re.match(r"method ([A-Za-z0-9]+)\.([A-Za-z0-9]+)$", line)
21 |             if m:
22 |                 interface_name, method_name = m.groups()
23 |                 yield {
24 |                     "type": "method",
25 |                     "interface_name": interface_name,
26 |                     "method_name": method_name,
27 |                 }
28 |                 continue
29 |             m = re.match(r"attribute ([A-Za-z0-9]+)\.([A-Za-z0-9]+)$", line)
30 |             if m:
31 |                 interface_name, attribute_name = m.groups()
32 |                 yield {
33 |                     "type": "attribute",
34 |                     "interface_name": interface_name,
35 |                     "attribute_name": attribute_name,
36 |                 }
37 |                 continue
38 |             m = re.match(r"property ([A-Za-z0-9]+)$", line)
39 |             if m:
40 |                 property_name = m.group(1)
41 |                 yield {"type": "property", "property_name": property_name}
42 |                 continue
43 |             m = re.match(r"custom ([A-Za-z0-9_]+) (.*)$", line)
44 |             if m:
45 |                 name, desc = m.groups()
46 |                 yield {"type": "custom", "name": name, "desc": desc}
47 |                 continue
48 |             raise ValueError("error parsing %s at line %d" % (conf_filename, line_num))
49 | 
50 |     return parse_counters(stream)
51 | 
52 | 
53 | def generate_histograms(filename):
54 |     # The mapping for use counters to telemetry histograms depends on the
55 |     # ordering of items in the dictionary.
56 |     items = collections.OrderedDict()
57 |     for counter in read_conf(filename):
58 | 
59 |         def append_counter(name, desc):
60 |             items[name] = {
61 |                 "expires_in_version": "never",
62 |                 "kind": "boolean",
63 |                 "description": desc,
64 |             }
65 | 
66 |         def append_counters(name, desc):
67 |             append_counter(
68 |                 "USE_COUNTER2_%s_DOCUMENT" % name, "Whether a document %s" % desc
69 |             )
70 |             append_counter("USE_COUNTER2_%s_PAGE" % name, "Whether a page %s" % desc)
71 | 
72 |         if counter["type"] == "method":
73 |             method = "%s.%s" % (counter["interface_name"], counter["method_name"])
74 |             append_counters(method.replace(".", "_").upper(), "called %s" % method)
75 |         elif counter["type"] == "attribute":
76 |             attr = "%s.%s" % (counter["interface_name"], counter["attribute_name"])
77 |             counter_name = attr.replace(".", "_").upper()
78 |             append_counters("%s_getter" % counter_name, "got %s" % attr)
79 |             append_counters("%s_setter" % counter_name, "set %s" % attr)
80 |         elif counter["type"] == "property":
81 |             prop = counter["property_name"]
82 |             append_counters(
83 |                 "PROPERTY_%s" % prop.replace("-", "_").upper(),
84 |                 "used the '%s' property" % prop,
85 |             )
86 |         elif counter["type"] == "custom":
87 |             append_counters(counter["name"].upper(), counter["desc"])
88 | 
89 |     return items
90 | 


--------------------------------------------------------------------------------
/probe_scraper/parsers/utils.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | HTTP_HEADERS = {
 6 |     "user-agent": "probe-scraper/1.0",
 7 | }
 8 | 
 9 | 
10 | def set_in_nested_dict(dictionary, path, value):
11 |     """Set a property in a nested dictionary by specifying a path to it.
12 | 
13 |     A call like e.g.:
14 |       set_in_nested_dict(d, "a/b/c", 1)
15 |     is equivalent to:
16 |       d["a"]["b"]["c"] = 1
17 |     """
18 |     keys = path.split("/")
19 |     for k in keys[:-1]:
20 |         dictionary = dictionary[k]
21 |     dictionary[keys[-1]] = value
22 | 
23 | 
24 | def get_major_version(version):
25 |     """Extracts the major (leftmost) version of a version string.
26 | 
27 |     :param version: the version string (e.g. "53.1")
28 |     :return: a string containing the leftmost number before the first dot
29 |     """
30 |     return version.split(".")[0]
31 | 
32 | 
33 | def get_source_url(glean_definition, repo_url, commit_hash):
34 |     """Add source URL where metrics and pings are defined."""
35 |     line_number = glean_definition["line"]
36 |     file_path = glean_definition["filepath"][
37 |         glean_definition["filepath"].find(commit_hash) :  # noqa: E203
38 |     ]
39 |     return f"{repo_url}/blob/{file_path}#L{line_number}"
40 | 


--------------------------------------------------------------------------------
/probe_scraper/remote_storage.py:
--------------------------------------------------------------------------------
  1 | import fnmatch
  2 | import gzip
  3 | import subprocess
  4 | from pathlib import Path
  5 | from tempfile import TemporaryDirectory
  6 | from typing import List, Optional, Tuple, Union
  7 | 
  8 | from .exc import ProbeScraperServerError
  9 | 
 10 | TEXT_HTML = "text/html"
 11 | APPLICATION_JSON = "application/json"
 12 | INDEX_HTML = "index.html"
 13 | 
 14 | 
 15 | def _call(args: List[str]):
 16 |     process = subprocess.run(
 17 |         args,
 18 |         stdout=subprocess.PIPE,
 19 |         stderr=subprocess.STDOUT,
 20 |         text=True,
 21 |     )
 22 |     if process.returncode == 0:
 23 |         print(process.stdout, end="")
 24 |     else:
 25 |         raise ProbeScraperServerError(
 26 |             f"Command {args!r} returned non-zero exit status {process.returncode}: "
 27 |             + process.stdout
 28 |         )
 29 | 
 30 | 
 31 | def _s3_sync(
 32 |     src: Union[str, Path],
 33 |     dst: Union[str, Path],
 34 |     delete: bool = False,
 35 |     exclude: Tuple[str, ...] = (),
 36 |     acl: Optional[str] = None,
 37 |     content_type: Optional[str] = None,
 38 |     content_encoding: Optional[str] = None,
 39 |     cache_control: Optional[str] = None,
 40 | ):
 41 |     # must use sync for dirs and cp for files
 42 |     if isinstance(src, Path) and src.is_file():
 43 |         # must upload files with cp
 44 |         s3_cmd = "cp"
 45 |     else:
 46 |         s3_cmd = "sync"
 47 | 
 48 |     _call(
 49 |         ["aws", "s3", s3_cmd, "--only-show-errors", str(src), str(dst)]
 50 |         + (["--delete"] if delete else [])
 51 |         + [
 52 |             arg
 53 |             for key, value in zip(
 54 |                 (
 55 |                     *("--exclude" for _ in exclude),
 56 |                     "--content-type",
 57 |                     "--content-encoding",
 58 |                     "--cache-control",
 59 |                     "--acl",
 60 |                 ),
 61 |                 (
 62 |                     *exclude,
 63 |                     content_type,
 64 |                     content_encoding,
 65 |                     cache_control,
 66 |                     acl,
 67 |                 ),
 68 |             )
 69 |             if value is not None
 70 |             for arg in (key, value)
 71 |         ]
 72 |     )
 73 | 
 74 | 
 75 | def _gcs_sync(
 76 |     src: Union[str, Path],
 77 |     dst: Union[str, Path],
 78 |     delete: bool = False,
 79 |     exclude: Tuple[str, ...] = (),
 80 |     content_type: Optional[str] = None,
 81 |     content_encoding: Optional[str] = None,
 82 |     cache_control: Optional[str] = None,
 83 |     acl: Optional[str] = None,
 84 | ):
 85 |     if isinstance(src, Path) and src.is_file():
 86 |         # must upload files with cp
 87 |         gsutil_cmd = ["cp"]
 88 |         if delete:
 89 |             raise ValueError("cannot delete when uploading a single file")
 90 |         if exclude:
 91 |             raise ValueError("cannot exclude when uploading a single file")
 92 |     else:
 93 |         gsutil_cmd = ["rsync", "-r"]
 94 | 
 95 |     _call(
 96 |         ["gsutil", "-q", "-m"]
 97 |         # -h flags are global and must appear before the rsync/cp command
 98 |         + [
 99 |             arg
100 |             for header, value in zip(
101 |                 ["Content-Type", "Content-Encoding", "Cache-Control"],
102 |                 [content_type, content_encoding, cache_control],
103 |             )
104 |             if value is not None
105 |             for arg in ("-h", f"{header}:{value}")
106 |         ]
107 |         + gsutil_cmd
108 |         # command specific options must appear before src and dst
109 |         + (["-d"] if delete else [])
110 |         # translate excludes from glob to regex before passing to gsutil
111 |         + [arg for item in exclude for arg in ("-x", fnmatch.translate(item))]
112 |         + (["-a", acl] if acl is not None else [])
113 |         + [str(src), str(dst)]
114 |     )
115 | 
116 | 
117 | def _get_sync_function(remote: str):
118 |     if remote.startswith("s3://"):
119 |         return _s3_sync
120 |     elif remote.startswith("gs://"):
121 |         return _gcs_sync
122 |     else:
123 |         raise ValueError(
124 |             f"remote path must have scheme like s3:// or gs://, got: {remote!r}"
125 |         )
126 | 
127 | 
128 | def remote_storage_pull(src: str, dst: Path, decompress: bool = False):
129 |     sync = _get_sync_function(src)
130 |     if sync is _gcs_sync:
131 |         # gsutil will decompress files
132 |         decompress = False
133 |         # prevent error from gsutil when dst and src do not exist
134 |         dst.mkdir(parents=True, exist_ok=True)
135 | 
136 |     if decompress:
137 |         with TemporaryDirectory() as tmp:
138 |             tmp_path = Path(tmp)
139 |             sync(src, tmp_path)
140 |             for in_file in tmp_path.rglob("*"):
141 |                 if not in_file.is_dir():
142 |                     out_file = dst / in_file.relative_to(tmp_path)
143 |                     out_file.parent.mkdir(parents=True, exist_ok=True)
144 |                     out_file.write_bytes(gzip.decompress(in_file.read_bytes()))
145 |     else:
146 |         sync(src, dst)
147 | 
148 | 
149 | def remote_storage_push(src: Path, dst: str, compress: bool = False, **kwargs):
150 |     sync = _get_sync_function(dst)
151 |     if compress:
152 |         kwargs["content_encoding"] = "gzip"
153 |         if "exclude" in kwargs:
154 |             raise NotImplementedError("exclude is not supported while compressing")
155 |         # cloudfront is supposed to automatically gzip objects, but it won't do that
156 |         # if the object size is > 10 megabytes (https://webmasters.stackexchange.com/a/111734)
157 |         # which our files sometimes are. to work around this, as well as to support google
158 |         # cloud storage, we'll gzip the contents into a temporary directory, and upload that
159 |         # with a special content encoding
160 |         with TemporaryDirectory() as tmp_name:
161 |             tmp = Path(tmp_name)
162 |             if src.is_dir():
163 |                 for in_file in src.rglob("*"):
164 |                     if not in_file.is_dir():
165 |                         out_file = tmp / in_file.relative_to(src)
166 |                         out_file.parent.mkdir(parents=True, exist_ok=True)
167 |                         out_file.write_bytes(gzip.compress(in_file.read_bytes()))
168 |                 index = tmp / INDEX_HTML
169 |                 if index.exists():
170 |                     # must be a tuple
171 |                     kwargs["exclude"] = (INDEX_HTML,)
172 |                 sync(
173 |                     src=tmp,
174 |                     dst=dst,
175 |                     content_type=APPLICATION_JSON,
176 |                     **kwargs,
177 |                 )
178 |                 if index.exists():
179 |                     # cannot delete or exclude with a single file
180 |                     kwargs["delete"] = False
181 |                     kwargs["exclude"] = ()
182 |                     sync(
183 |                         src=index,
184 |                         dst=dst,
185 |                         content_type=TEXT_HTML,
186 |                         **kwargs,
187 |                     )
188 |             else:
189 |                 tmp_file = tmp / src.name
190 |                 tmp_file.write_bytes(gzip.compress(src.read_bytes()))
191 |                 content_type = TEXT_HTML if src.name == INDEX_HTML else APPLICATION_JSON
192 |                 sync(
193 |                     src=tmp_file,
194 |                     dst=dst,
195 |                     content_type=content_type,
196 |                     **kwargs,
197 |                 )
198 |     else:
199 |         sync(src, dst, **kwargs)
200 | 


--------------------------------------------------------------------------------
/probe_scraper/scrapers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/probe-scraper/0506e31f38e61ddc662c0eab18826b370314896e/probe_scraper/scrapers/__init__.py


--------------------------------------------------------------------------------
/probe_scraper/scrapers/buildhub.py:
--------------------------------------------------------------------------------
  1 | import pprint
  2 | import re
  3 | from datetime import datetime
  4 | 
  5 | import requests
  6 | 
  7 | 
  8 | class NoDataFoundException(Exception):
  9 |     pass
 10 | 
 11 | 
 12 | class Buildhub(object):
 13 | 
 14 |     search_url = "https://buildhub.moz.tools/api/search"
 15 |     default_window = 1000
 16 | 
 17 |     date_formats = ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S.%f")
 18 | 
 19 |     def _paginate_revision_dates(
 20 |         self,
 21 |         iteration,
 22 |         channel,
 23 |         min_version,
 24 |         product,
 25 |         locale,
 26 |         platform,
 27 |         max_version,
 28 |         verbose,
 29 |         window,
 30 |     ):
 31 |         query_str = [
 32 |             {"term": {"source.product": product}},
 33 |             {"term": {"target.channel": channel}},
 34 |             {"term": {"target.locale": locale}},
 35 |             {"term": {"target.platform": platform}},
 36 |         ]
 37 | 
 38 |         # See: "99" > "65" == True, "100" > "65" == False
 39 |         # FIXME: This breaks if we get to v200
 40 |         # If we only need versions above 99 we restrict it to versions below 200,
 41 |         # then we're good for a bunch of versions.
 42 |         if min_version >= 100:
 43 |             query_str.append({"range": {"target.version": {"gte": str(min_version)}}})
 44 |             if max_version is None:
 45 |                 # This works because the minimum we ever ask for is v30.
 46 |                 query_str.append({"range": {"target.version": {"lt": "200"}}})
 47 |         else:
 48 |             # If the user didn't set a max version we need to explicitly include v100..v200 here.
 49 |             if max_version is None:
 50 |                 query_str.append(
 51 |                     {
 52 |                         "bool": {
 53 |                             "should": [
 54 |                                 {
 55 |                                     "range": {
 56 |                                         "target.version": {"gte": str(min_version)}
 57 |                                     }
 58 |                                 },
 59 |                                 {
 60 |                                     "bool": {
 61 |                                         "must": [
 62 |                                             {
 63 |                                                 "range": {
 64 |                                                     "target.version": {"gte": "100"}
 65 |                                                 }
 66 |                                             },
 67 |                                             {
 68 |                                                 "range": {
 69 |                                                     "target.version": {"lt": "200"}
 70 |                                                 }
 71 |                                             },
 72 |                                         ]
 73 |                                     }
 74 |                                 },
 75 |                             ]
 76 |                         }
 77 |                     }
 78 |                 )
 79 |             else:
 80 |                 # Otherwise we only check the min version,
 81 |                 # the max version check will be appended
 82 |                 query_str.append(
 83 |                     {"range": {"target.version": {"gte": str(min_version)}}}
 84 |                 )
 85 | 
 86 |         if max_version is not None:
 87 |             query_str.append(
 88 |                 {
 89 |                     "bool": {
 90 |                         "should": [
 91 |                             {"range": {"target.version": {"lte": str(max_version)}}},
 92 |                             {"prefix": {"target.version": str(max_version)}},
 93 |                         ]
 94 |                     }
 95 |                 }
 96 |             )
 97 | 
 98 |         body = {"query": {"bool": {"filter": query_str}}, "size": window}
 99 | 
100 |         if iteration != 0:
101 |             body["from"] = iteration * window
102 | 
103 |         if verbose:
104 |             print("------QUERY STRING------\n")
105 |             pprint.pprint(body)
106 | 
107 |         response = requests.post(url=Buildhub.search_url, json=body)
108 |         data = response.json()
109 | 
110 |         if verbose:
111 |             print("------QUERY RESULTS------\n")
112 |             pprint.pprint(data)
113 | 
114 |         return data
115 | 
116 |     def _distinct_and_clean(self, records):
117 |         """
118 |         For more information on the schema of the records,
119 |         see the Buildhub API documentation:
120 |         https://buildhub.readthedocs.io/en/latest/api.html#more-about-the-data-schema
121 |         """
122 |         cleaned_records = {}
123 | 
124 |         for record in records:
125 |             # %:z not supported, see https://bugs.python.org/msg169952
126 |             # Drop the tz portion entirely
127 |             d = record["_source"]["download"]["date"]
128 |             if re.search(r"\+\d{2}:\d{2}$", d):
129 |                 d = d[:-6]
130 | 
131 |             date = None
132 |             try:
133 |                 date = datetime.strptime(d, self.date_formats[0])
134 |             except ValueError:
135 |                 pass
136 | 
137 |             if date is None:
138 |                 date = datetime.strptime(d, self.date_formats[1])
139 | 
140 |             entry = {
141 |                 "date": date,
142 |                 "revision": record["_source"]["source"]["revision"],
143 |                 "version": record["_source"]["target"]["version"],
144 |                 "tree": record["_source"]["source"]["tree"],
145 |             }
146 | 
147 |             revision = entry["revision"]
148 |             min_entry = entry
149 | 
150 |             if revision in cleaned_records:
151 |                 if cleaned_records[revision] != entry:
152 |                     min_entry = min(
153 |                         (entry, cleaned_records[revision]), key=lambda x: x["date"]
154 |                     )
155 | 
156 |             cleaned_records[revision] = min_entry
157 | 
158 |         return sorted(cleaned_records.values(), key=lambda x: x["date"])
159 | 
160 |     def get_revision_dates(
161 |         self,
162 |         channel,
163 |         min_version,
164 |         product="firefox",
165 |         locale="en-US",
166 |         platform="win64",
167 |         max_version=None,
168 |         verbose=False,
169 |         window=500,
170 |     ):
171 |         """
172 |         Retrieve the revisions and publish-dates for a given filter set.
173 |         The combination of channel, product, local, and platform almost
174 |         gives a set of unique (revision, publication-dates). For example,
175 |         `win64` includes x86 and arm-64 builds. As such we de-duplicate
176 |         the result set and include the build with the earliest publication
177 |         date.
178 | 
179 |         Tree is the source tree, usually one of:
180 |             - mozilla-central
181 |             - mozilla-beta
182 |             - mozilla-release
183 | 
184 |         :param channel: The release channel
185 |         :param min_version: The minimum version to include
186 |         :param product: Defaults to firefox
187 |         :param locale: Defaults to en-US
188 |         :param platform: Defaults to win64
189 |         :param max_version: Optional maximum version to include
190 |         :param verbose: Verbose output of query string and results
191 |         :param window: Number of records to retrieve at a time
192 | 
193 |         returns a list of records of type
194 |         {
195 |             "date": <date>
196 |             "revision": <revision>,
197 |             "version": <version>,
198 |             "tree": <tree>
199 |         }
200 |         """
201 | 
202 |         # Because "100" > "99" == False we special-case v100 to v199.
203 |         # v200 is far out, so we just ignore that for now.
204 |         assert min_version < 200, "Only versions below 200 are supported"
205 | 
206 |         total_hits = 0
207 |         results = []
208 | 
209 |         for i in range(2**20):
210 |             data = self._paginate_revision_dates(
211 |                 i,
212 |                 channel,
213 |                 min_version,
214 |                 product,
215 |                 locale,
216 |                 platform,
217 |                 max_version,
218 |                 verbose,
219 |                 window,
220 |             )
221 | 
222 |             # hits/total gives total number of records, including
223 |             # those outside the window. We need to know the number
224 |             # inside the window.
225 |             hits = len(data["hits"]["hits"])
226 | 
227 |             if hits:
228 |                 total_hits += hits
229 |                 results.append(data)
230 | 
231 |             # optimization, removes the last no-result window
232 |             if hits < window:
233 |                 break
234 | 
235 |         if total_hits == 0:
236 |             raise NoDataFoundException(
237 |                 "No data found for channel {} and minimum \
238 |                                        version {}".format(
239 |                     channel, min_version
240 |                 )
241 |             )
242 | 
243 |         all_records = [
244 |             record for result in results for record in result["hits"]["hits"]
245 |         ]
246 |         return self._distinct_and_clean(all_records)
247 | 


--------------------------------------------------------------------------------
/probe_scraper/scrapers/moz_central_scraper.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | import json
  6 | import os
  7 | import re
  8 | from collections import defaultdict
  9 | 
 10 | import requests
 11 | 
 12 | from ..parsers.utils import HTTP_HEADERS
 13 | from .buildhub import Buildhub
 14 | 
 15 | BASE_URI = "https://hg.mozilla.org"
 16 | 
 17 | REGISTRY_FILES = {
 18 |     "histogram": [
 19 |         "toolkit/components/telemetry/Histograms.json",
 20 |         "dom/base/UseCounters.conf",
 21 |         "dom/base/nsDeprecatedOperationList.h",
 22 |         "servo/components/style/properties/counted_unknown_properties.py",
 23 |         "devtools/shared/css/generated/properties-db.js",
 24 |     ],
 25 |     "scalar": [
 26 |         "toolkit/components/telemetry/Scalars.yaml",
 27 |     ],
 28 |     "event": [
 29 |         "toolkit/components/telemetry/Events.yaml",
 30 |     ],
 31 | }
 32 | 
 33 | 
 34 | CHANNELS = {
 35 |     "nightly": {
 36 |         "base_uri": f"{BASE_URI}/mozilla-central",
 37 |         "tag_regex": "^FIREFOX_(AURORA|BETA)_[0-9]+_BASE$",
 38 |         "artificial_tags": [
 39 |             {
 40 |                 "date": [1567362726.0, 0],
 41 |                 "node": "fd2934cca1ae7b492f29a4d240915aa9ec5b4977",
 42 |                 "tag": "FIREFOX_BETA_71_BASE",
 43 |             }
 44 |         ],
 45 |     },
 46 |     "beta": {
 47 |         "base_uri": f"{BASE_URI}/releases/mozilla-beta",
 48 |         "tag_regex": "^FIREFOX_BETA_[0-9]+_BASE$",
 49 |     },
 50 |     "release": {
 51 |         "base_uri": f"{BASE_URI}/releases/mozilla-release",
 52 |         "tag_regex": "^FIREFOX_[0-9]+_0_RELEASE$",
 53 |     },
 54 | }
 55 | 
 56 | SKIP_REVISIONS = {
 57 |     "942c201b1ac7a46a449f1fb80da7b050ec0ea120",
 58 |     "1807a36ff99f01abca1c37442fb5b344465bfbdf",
 59 |     "30bdee9799a07b8770719aa868416174ff0c54f5",
 60 |     "9fb70b4ae59336b805a1651e7c57c6385cca0717",
 61 |     "81578db6bf8939678d490b69f0daf4b675027e3a",
 62 |     "b8567457ece9593ddb00344130597698145bdc5c",
 63 |     "c4bdea458a08b975ffd70faed4a2f6fbe1e563bc",
 64 |     "d420f9190e2f35e314aa67ee346650f86451792c",
 65 |     "a680e8cd9618f4afbbb148ad464824cd6ce558d9",
 66 |     "5cbd3d92a78c54b324b6009a25d196adaa8a669b",
 67 |     "75c1403f58f79d1abd43d33fdd1beb36db9367c6",
 68 |     "cafaf813b0a938a197a488e629883770b2d33393",
 69 |     "cbbf6a7e34a363b39107b60dddac2aa713eaa8b5",
 70 | }
 71 | 
 72 | MIN_FIREFOX_VERSION = 30
 73 | ERROR_CACHE_FILENAME = "probe_scraper_errors_cache.json"
 74 | ARTIFICIAL_TAG = "artificial"
 75 | 
 76 | 
 77 | def extract_major_version(version_str):
 78 |     """
 79 |     Given a version string, e.g. "62.0a1",
 80 |     extract the major version as an int.
 81 |     """
 82 |     search = re.search(r"^(\d+)\.", version_str)
 83 |     if search is not None:
 84 |         return int(search.group(1))
 85 |     else:
 86 |         raise Exception("Invalid version string " + version_str)
 87 | 
 88 | 
 89 | def relative_path_is_in_version(rel_path, version):
 90 |     # The devtools file exists in a bunch of versions, but we only care for it
 91 |     # since firefox 71 (bug 1578661).
 92 |     if (
 93 |         rel_path == "devtools/shared/css/generated/properties-db.js"
 94 |         or rel_path == "servo/components/style/properties/counted_unknown_properties.py"
 95 |     ):
 96 |         return version >= 71
 97 |     return True
 98 | 
 99 | 
100 | def download_files(channel, node, temp_dir, error_cache, version, tree=None):
101 |     if tree is None:
102 |         uri = CHANNELS[channel]["base_uri"]
103 |     else:
104 |         # mozilla-release and mozilla-beta need to be prefixed with "release/"
105 |         # sometimes they aren't from buildhub, add them if they are missing
106 |         if not tree.startswith("releases/") and tree != "mozilla-central":
107 |             tree = f"releases/{tree}"
108 |         uri = f"{BASE_URI}/{tree}"
109 | 
110 |     base_uri = f"{uri}/raw-file/{node}/"
111 |     node_path = os.path.join(temp_dir, "hg", node)
112 | 
113 |     results = {}
114 | 
115 |     def add_result(ptype, disk_path):
116 |         if ptype not in results:
117 |             results[ptype] = []
118 |         results[ptype].append(disk_path)
119 | 
120 |     all_files = [(k, x) for k, l in list(REGISTRY_FILES.items()) for x in l]
121 |     for ptype, rel_path in all_files:
122 |         disk_path = os.path.join(node_path, rel_path)
123 |         if os.path.exists(disk_path):
124 |             add_result(ptype, disk_path)
125 |             continue
126 | 
127 |         uri = base_uri + rel_path
128 |         # requests_cache doesn't cache on error status codes.
129 |         # We just use our own cache for these for now.
130 |         if uri in error_cache:
131 |             continue
132 | 
133 |         if not relative_path_is_in_version(rel_path, int(version)):
134 |             continue
135 | 
136 |         req = requests.get(uri, headers=HTTP_HEADERS)
137 |         if req.status_code != requests.codes.ok:
138 |             if os.path.basename(rel_path) == "Histograms.json":
139 |                 raise Exception(
140 |                     "Request returned status " + str(req.status_code) + " for " + uri
141 |                 )
142 |             else:
143 |                 error_cache[uri] = req.status_code
144 |                 continue
145 | 
146 |         dir = os.path.split(disk_path)[0]
147 |         if not os.path.exists(dir):
148 |             os.makedirs(dir)
149 |         with open(disk_path, "wb") as f:
150 |             for chunk in req.iter_content(chunk_size=128):
151 |                 f.write(chunk)
152 | 
153 |         add_result(ptype, disk_path)
154 | 
155 |     return results
156 | 
157 | 
158 | def load_error_cache(folder):
159 |     path = os.path.join(folder, ERROR_CACHE_FILENAME)
160 |     if not os.path.exists(path):
161 |         return {}
162 |     with open(path, "r") as f:
163 |         return json.load(f)
164 | 
165 | 
166 | def save_error_cache(folder, error_cache):
167 |     path = os.path.join(folder, ERROR_CACHE_FILENAME)
168 |     with open(path, "w") as f:
169 |         json.dump(error_cache, f, sort_keys=True, indent=2, separators=(",", ": "))
170 | 
171 | 
172 | def scrape_channel_revisions(
173 |     folder=None, min_fx_version=None, max_fx_version=None, channels=None
174 | ):
175 |     """
176 |     Returns data in the format:
177 |     {
178 |       <channel>: {
179 |         <revision>: {
180 |           "date": <date>,
181 |           "version": <version>,
182 |           "registries": {
183 |             "histogram": [path, ...],
184 |             "event": [path, ...],
185 |             "scalar": [path, ...]
186 |           }
187 |         }
188 |       },
189 |       ...
190 |     }
191 |     """
192 |     if min_fx_version is None:
193 |         min_fx_version = MIN_FIREFOX_VERSION
194 | 
195 |     error_cache = load_error_cache(folder)
196 |     bh = Buildhub()
197 |     results = defaultdict(dict)
198 | 
199 |     if channels is None:
200 |         channels = CHANNELS.keys()
201 | 
202 |     for channel in channels:
203 |         print("\nRetreiving Buildhub results for channel " + channel)
204 | 
205 |         revision_dates = [
206 |             rd
207 |             for rd in bh.get_revision_dates(
208 |                 channel, min_fx_version, max_version=max_fx_version
209 |             )
210 |             if rd["revision"] not in SKIP_REVISIONS
211 |         ]
212 |         num_revisions = len(revision_dates)
213 | 
214 |         print("  " + str(num_revisions) + " revisions found")
215 | 
216 |         for i, rd in enumerate(revision_dates):
217 |             revision = rd["revision"]
218 | 
219 |             print(
220 |                 (
221 |                     f"  Downloading files for revision number {str(i+1)}/{str(num_revisions)}"
222 |                     f" - revision: {revision}, tree: {rd['tree']}, version: {str(rd['version'])}"
223 |                 )
224 |             )
225 |             version = extract_major_version(rd["version"])
226 |             files = download_files(
227 |                 channel, revision, folder, error_cache, version, tree=rd["tree"]
228 |             )
229 | 
230 |             results[channel][revision] = {
231 |                 "date": rd["date"],
232 |                 "version": version,
233 |                 "registries": files,
234 |             }
235 |             save_error_cache(folder, error_cache)
236 | 
237 |     return results
238 | 


--------------------------------------------------------------------------------
/probe_scraper/transform_revisions.py:
--------------------------------------------------------------------------------
 1 | # This Source Code Form is subject to the terms of the Mozilla Public
 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | from collections import defaultdict
 6 | 
 7 | 
 8 | def transform(node_data):
 9 |     results = defaultdict(dict)
10 |     for channel, nodes in node_data.items():
11 |         for node_id, details in nodes.items():
12 |             results[channel][node_id] = {
13 |                 "version": details.get("version"),
14 |                 "date": details.get("date"),
15 |             }
16 | 
17 |     return results
18 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     web_dependency: mark a test that requires a web connection.
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | awscli==1.29.7
 2 | beautifulsoup4==4.8.2
 3 | GitPython==3.1.41
 4 | boto3==1.28.7
 5 | Flask==2.3.3
 6 | glean-parser~=17.1.0
 7 | google-cloud-bigquery==3.23.1
 8 | google-cloud-storage==2.2.1
 9 | gsutil==5.28
10 | Jinja2==3.1.6
11 | jsonschema==3.1.1
12 | python-dateutil==2.8.0
13 | PyYAML==6.0.1
14 | requests==2.32.0
15 | requests_cache==0.5.2
16 | requests_file==1.4.3
17 | schema==0.7.1
18 | urllib3==1.26.19
19 | Werkzeug==2.3.8
20 | yamllint
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name="probe-scraper",
 7 |     version="0.1",
 8 |     description="Scrape metric data from Mozilla products repositories.",
 9 |     author="Mozilla",
10 |     # While this is not owned by the Glean team, I could not find a better
11 |     # email address for this.
12 |     author_email="glean-team@mozilla.com",
13 |     classifiers=[
14 |         "Intended Audience :: Developers",
15 |         "Natural Language :: English",
16 |         "Programming Language :: Python :: 3",
17 |         "Programming Language :: Python :: 3.8",
18 |         "Programming Language :: Python :: 3.9",
19 |         "Programming Language :: Python :: 3.10",
20 |     ],
21 |     url="https://github.com/mozilla/probe-scraper/",
22 |     packages=["probe_scraper"],
23 |     license="MPL 2.0",
24 | )
25 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | flake8==7.0.0
2 | pytest>=3.0
3 | black==24.4.2
4 | isort==5.13.2
5 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/probe-scraper/0506e31f38e61ddc662c0eab18826b370314896e/tests/__init__.py


--------------------------------------------------------------------------------
/tests/resources/Histograms.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "TELEMETRY_TEST_FLAG": {
  3 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
  4 |     "expires_in_version": "never",
  5 |     "kind": "flag",
  6 |     "description": "a testing histogram; not meant to be touched"
  7 |   },
  8 |   "TELEMETRY_TEST_COUNT": {
  9 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 10 |     "expires_in_version": "never",
 11 |     "kind": "count",
 12 |     "description": "a testing histogram; not meant to be touched"
 13 |   },
 14 |   "TELEMETRY_TEST_COUNT2": {
 15 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 16 |     "expires_in_version": "never",
 17 |     "kind": "count",
 18 |     "bug_numbers": [1288745],
 19 |     "description": "a testing histogram; not meant to be touched"
 20 |   },
 21 |   "TELEMETRY_TEST_COUNT_INIT_NO_RECORD": {
 22 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 23 |     "expires_in_version": "never",
 24 |     "kind": "count",
 25 |     "description": "a testing histogram; not meant to be touched - initially not recording"
 26 |   },
 27 |   "TELEMETRY_TEST_CATEGORICAL": {
 28 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 29 |     "bug_numbers": [1188888],
 30 |     "expires_in_version": "never",
 31 |     "kind": "categorical",
 32 |     "labels": [
 33 |       "CommonLabel",
 34 |       "Label2",
 35 |       "Label3"
 36 |     ],
 37 |     "description": "a testing histogram; not meant to be touched"
 38 |   },
 39 |   "TELEMETRY_TEST_CATEGORICAL_OPTOUT": {
 40 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 41 |     "bug_numbers": [1188888],
 42 |     "expires_in_version": "never",
 43 |     "releaseChannelCollection": "opt-out",
 44 |     "kind": "categorical",
 45 |     "labels": [
 46 |       "CommonLabel",
 47 |       "Label4",
 48 |       "Label5",
 49 |       "Label6"
 50 |     ],
 51 |     "description": "a testing histogram; not meant to be touched"
 52 |   },
 53 |   "TELEMETRY_TEST_CATEGORICAL_NVALUES": {
 54 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 55 |     "bug_numbers": [1188888],
 56 |     "expires_in_version": "never",
 57 |     "kind": "categorical",
 58 |     "n_values": 70,
 59 |     "labels": [
 60 |       "CommonLabel",
 61 |       "Label7",
 62 |       "Label8"
 63 |     ],
 64 |     "description": "a testing histogram; not meant to be touched"
 65 |   },
 66 |   "TELEMETRY_TEST_CATEGORICAL_EMPTY_LABELS": {
 67 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 68 |     "bug_numbers": [1188888],
 69 |     "expires_in_version": "never",
 70 |     "kind": "categorical",
 71 |     "labels": [
 72 |     ],
 73 |     "description": "a testing histogram; not meant to be touched"
 74 |   },
 75 |   "TELEMETRY_TEST_KEYED_COUNT_INIT_NO_RECORD": {
 76 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 77 |     "expires_in_version": "never",
 78 |     "kind": "count",
 79 |     "keyed": true,
 80 |     "description": "a testing histogram; not meant to be touched - initially not recording"
 81 |   },
 82 |   "TELEMETRY_TEST_KEYED_FLAG": {
 83 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 84 |     "expires_in_version": "never",
 85 |     "kind": "flag",
 86 |     "keyed": true,
 87 |     "description": "a testing histogram; not meant to be touched"
 88 |   },
 89 |   "TELEMETRY_TEST_KEYED_COUNT": {
 90 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 91 |     "expires_in_version": "never",
 92 |     "kind": "count",
 93 |     "keyed": true,
 94 |     "description": "a testing histogram; not meant to be touched"
 95 |   },
 96 |     "TELEMETRY_TEST_KEYED_BOOLEAN": {
 97 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
 98 |     "expires_in_version": "never",
 99 |     "kind": "boolean",
100 |     "keyed": true,
101 |     "bug_numbers": [1299144],
102 |     "description": "a testing histogram; not meant to be touched"
103 |   },
104 |   "TELEMETRY_TEST_RELEASE_OPTOUT": {
105 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
106 |     "expires_in_version": "never",
107 |     "kind": "flag",
108 |     "releaseChannelCollection": "opt-out",
109 |     "description": "a testing histogram; not meant to be touched"
110 |   },
111 |   "TELEMETRY_TEST_RELEASE_OPTIN": {
112 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
113 |     "expires_in_version": "never",
114 |     "kind": "flag",
115 |     "releaseChannelCollection": "opt-in",
116 |     "description": "a testing histogram; not meant to be touched"
117 |   },
118 |   "TELEMETRY_TEST_KEYED_RELEASE_OPTIN": {
119 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
120 |     "expires_in_version": "never",
121 |     "kind": "flag",
122 |     "keyed": true,
123 |     "releaseChannelCollection": "opt-in",
124 |     "description": "a testing histogram; not meant to be touched"
125 |   },
126 |   "TELEMETRY_TEST_KEYED_RELEASE_OPTOUT": {
127 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
128 |     "expires_in_version": "never",
129 |     "kind": "flag",
130 |     "keyed": true,
131 |     "releaseChannelCollection": "opt-out",
132 |     "description": "a testing histogram; not meant to be touched"
133 |   },
134 |   "TELEMETRY_TEST_EXPONENTIAL": {
135 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
136 |     "expires_in_version": "never",
137 |     "kind": "exponential",
138 |     "low": 1,
139 |     "high": 2147483646,
140 |     "n_buckets": 10,
141 |     "bug_numbers": [1288745],
142 |     "description": "a testing histogram; not meant to be touched"
143 |   },
144 |   "TELEMETRY_TEST_LINEAR": {
145 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
146 |     "expires_in_version": "never",
147 |     "kind": "linear",
148 |     "low": 1,
149 |     "high": 2147483646,
150 |     "n_buckets": 10,
151 |     "bug_numbers": [1288745],
152 |     "description": "a testing histogram; not meant to be touched"
153 |   },
154 |   "TELEMETRY_TEST_BOOLEAN": {
155 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
156 |     "expires_in_version" : "never",
157 |     "kind": "boolean",
158 |     "bug_numbers": [1288745],
159 |     "description": "a testing histogram; not meant to be touched"
160 |   },
161 |   "TELEMETRY_TEST_EXPIRED": {
162 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
163 |     "expires_in_version": "4.0a1",
164 |     "kind": "flag",
165 |     "description": "a testing histogram; not meant to be touched"
166 |   },
167 |   "TELEMETRY_TEST_ALL_CHILDREN": {
168 |     "record_in_processes": ["all_children"],
169 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
170 |     "expires_in_version": "never",
171 |     "kind": "linear",
172 |     "low": 1,
173 |     "high": 10000,
174 |     "n_buckets": 10,
175 |     "bug_numbers": [1363725],
176 |     "description": "a testing histogram; not meant to be touched"
177 |   },
178 |   "TELEMETRY_TEST_ALL_CHILDS": {
179 |     "record_in_processes": ["all_childs"],
180 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
181 |     "expires_in_version": "never",
182 |     "kind": "linear",
183 |     "low": 1,
184 |     "high": 10000,
185 |     "n_buckets": 10,
186 |     "bug_numbers": [1335343,1363725],
187 |     "description": "a testing histogram; not meant to be touched"
188 |   },
189 |   "EXPRESSION_IN_LOW_HIGH_ATTRIBUTE": {
190 |     "expires_in_version": "never",
191 |     "kind": "exponential",
192 |     "low": "32 * 1024",
193 |     "high": "16 * 1024 * 1024",
194 |     "n_buckets": 200,
195 |     "extended_statistics_ok": true,
196 |     "description": "Test Case for expression in low/high attribute"
197 |   },
198 |   "NON_INTEGER_IN_HIGH_ATTRIBUTE": {
199 |     "expires_in_version": "never",
200 |     "kind": "exponential",
201 |     "description": "Test Case for non-integer in high attribute",
202 |     "high": "5000",
203 |     "n_buckets": 10,
204 |     "extended_statistics_ok": true
205 |   },
206 |   "HISTOGRAM_WITH_MULTISTORE": {
207 |     "alert_emails": ["telemetry-client-dev@mozilla.com"],
208 |     "expires_in_version": "never",
209 |     "kind": "linear",
210 |     "low": 1,
211 |     "high": 10000,
212 |     "n_buckets": 10,
213 |     "bug_numbers": [1335343,1363725],
214 |     "description": "a testing histogram; not meant to be touched",
215 |     "record_into_store": ["main", "store2"]
216 |   }
217 | }
218 | 


--------------------------------------------------------------------------------
/tests/resources/UseCounters.conf:
--------------------------------------------------------------------------------
 1 | // This Source Code Form is subject to the terms of the Mozilla Public
 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | // file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | // This file defines a list of use counters, which are things that can
 6 | // record usage of Web platform features and then report this information
 7 | // through Telemetry.
 8 | //
 9 | // The format of this file is very strict.  Each line can be:
10 | //
11 | //   (a) a blank line
12 | //
13 | //   (b) a comment, which is a line that begins with "//"
14 | //
15 | //   (c) one of three possible use counter declarations:
16 | //
17 | //         method <IDL interface name>.<IDL operation name>
18 | //         attribute <IDL interface name>.<IDL attribute name>
19 | //         property <CSS property method name>
20 | //
21 | // The |CSS property method name| should be identical to the |method|
22 | // argument to CSS_PROP and related macros.  The method name is
23 | // identical to the name of the property, except that all hyphens are
24 | // removed and CamelCase naming is used.  See nsCSSPropList.h for
25 | // further details.
26 | //
27 | // To actually cause use counters to be incremented, DOM methods
28 | // and attributes must have a [UseCounter] extended attribute in
29 | // the Web IDL file.  CSS properties require no special treatment
30 | // beyond being listed below.
31 | //
32 | // You might reasonably ask why we have this file and we require
33 | // annotating things with [UseCounter] in the relevant WebIDL file as
34 | // well.  Generating things from bindings codegen and ensuring all the
35 | // dependencies were correct would have been rather difficult, and
36 | // annotating the WebIDL files does nothing for identifying CSS
37 | // property usage, which we would also like to track.
38 | 
39 | method SVGSVGElement.getElementById
40 | attribute SVGSVGElement.currentScale
41 | property Fill
42 | 


--------------------------------------------------------------------------------
/tests/resources/metrics.yaml:
--------------------------------------------------------------------------------
 1 | $schema: moz://mozilla.org/schemas/glean/metrics/1-0-0 
 2 | 
 3 | example:
 4 |   duration:
 5 |     type: timespan
 6 |     description: |
 7 |       The duration of the last foreground session.
 8 |     time_unit: second
 9 |     send_in_pings:
10 |       - baseline
11 |     bugs:
12 |       - 1497894, 1519120
13 |     data_reviews:
14 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
15 |     notification_emails:
16 |       - telemetry-client-dev@mozilla.com
17 |     expires: '2015-07-11'
18 | 
19 |   os:
20 |     type: string
21 |     lifetime: application
22 |     send_in_pings:
23 |       - baseline
24 |       - session_end
25 |     description: |
26 |       The name of the operating system.
27 |     bugs:
28 |       - 1497894
29 |     data_reviews:
30 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
31 |     notification_emails:
32 |       - telemetry-client-dev@mozilla.com
33 |     expires: never
34 | 


--------------------------------------------------------------------------------
/tests/resources/nsDeprecatedOperationList.h:
--------------------------------------------------------------------------------
 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 3 | /* This Source Code Form is subject to the terms of the Mozilla Public
 4 |  * License, v. 2.0. If a copy of the MPL was not distributed with this
 5 |  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 6 | // IWYU pragma: private, include "nsIDocument.h"
 7 | 
 8 | /*
 9 |  * This file contains the list of deprecated DOM operations.  It is
10 |  * designed to be used as input to the C preprocessor *only*.
11 |  */
12 | 
13 | DEPRECATED_OPERATION(GetAttributeNode)
14 | DEPRECATED_OPERATION(SetAttributeNode)
15 | 


--------------------------------------------------------------------------------
/tests/resources/test_events.yaml:
--------------------------------------------------------------------------------
 1 | # This category contains event entries used for Telemetry tests.
 2 | # They will not be sent out with any pings.
 3 | telemetry.test:
 4 |   test:
 5 |     methods: ["test1", "test2"]
 6 |     objects: ["object1", "object2"]
 7 |     bug_numbers: [1286606]
 8 |     notification_emails: ["telemetry-client-dev@mozilla.com"]
 9 |     record_in_processes: ['main', 'content']
10 |     description: This is a test entry for Telemetry.
11 |     expiry_date: never
12 |     extra_keys:
13 |       key1: This is just a test description.
14 |       key2: This is another test description.
15 |   optout:
16 |     objects: ["object1", "object2"]
17 |     bug_numbers: [1286606]
18 |     notification_emails: ["telemetry-client-dev@mozilla.com"]
19 |     description: This is an opt-out test entry.
20 |     expiry_date: never
21 |     release_channel_collection: opt-out
22 |     extra_keys:
23 |       key1: This is just a test description.
24 |   expired_version:
25 |     objects: ["object1", "object2"]
26 |     bug_numbers: [1286606]
27 |     notification_emails: ["telemetry-client-dev@mozilla.com"]
28 |     description: This is a test entry with an expired version.
29 |     expiry_version: "3.6"
30 |   too_long_of_an_event_name:
31 |     objects: ["object1", "object2"]
32 |     bug_numbers: [1286606]
33 |     notification_emails: ["telemetry-client-dev@mozilla.com"]
34 |     description: This is an opt-out test entry.
35 |     expiry_date: never
36 |     release_channel_collection: opt-out
37 |     extra_keys:
38 |       key1: This is just a test description.   
39 |       pause_behavior_change: This is a too-long extra key
40 | 
41 | # This is a secondary category used for Telemetry tests.
42 | # The events here will not be sent out with any pings.
43 | telemetry.test.second:
44 |   test:
45 |     objects: ["object1", "object2", "object3"]
46 |     bug_numbers: [1286606]
47 |     notification_emails: ["telemetry-client-dev@mozilla.com"]
48 |     description: This is a test entry for Telemetry.
49 |     expiry_date: never
50 |     extra_keys:
51 |       key1: This is just a test description.
52 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/duplicate/0/metrics.yaml:
--------------------------------------------------------------------------------
 1 | $schema: moz://mozilla.org/schemas/glean/metrics/1-0-0
 2 | 
 3 | example:
 4 |   duration:
 5 |     type: counter
 6 |     description: |
 7 |       The duration of the last foreground session.
 8 |     send_in_pings:
 9 |       - baseline
10 |     bugs:
11 |       - 1497894, 1519120
12 |     data_reviews:
13 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
14 |     notification_emails:
15 |       - alice@example.com
16 |     expires: '2100-07-11'
17 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/expired/0/metrics.yaml:
--------------------------------------------------------------------------------
 1 | $schema: moz://mozilla.org/schemas/glean/metrics/1-0-0
 2 | 
 3 | example:
 4 |   duration:
 5 |     type: timespan
 6 |     description: |
 7 |       The duration of the last foreground session.
 8 |     time_unit: second
 9 |     send_in_pings:
10 |       - baseline
11 |     bugs:
12 |       - 1497894, 1519120
13 |     data_reviews:
14 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
15 |     notification_emails:
16 |       - bob@example.com
17 |     expires: '2019-01-01'
18 | 
19 |   os:
20 |     type: string
21 |     lifetime: application
22 |     send_in_pings:
23 |       - baseline
24 |     description: |
25 |       Stop
26 |     bugs:
27 |       - 1497894
28 |     data_reviews:
29 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
30 |     notification_emails:
31 |       - telemetry-client-dev@mozilla.com
32 |     expires: never
33 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/improper/0/metrics.yaml:
--------------------------------------------------------------------------------
 1 | example:
 2 |   duration:
 3 |     type: timespan
 4 |     time_unit: second
 5 |     send_in_pings:
 6 |       - baseline
 7 |     bugs:
 8 |       - 1497894, 1519120
 9 |     data_reviews:
10 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
11 |     notification_emails:
12 |       - telemetry-client-dev@mozilla.com
13 |     expires: never
14 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/normal/0/metrics.yaml:
--------------------------------------------------------------------------------
 1 | $schema: moz://mozilla.org/schemas/glean/metrics/1-0-0
 2 | 
 3 | example:
 4 |   duration:
 5 |     type: timespan
 6 |     description: |
 7 |       The duration of the last foreground session.
 8 |     time_unit: second
 9 |     send_in_pings:
10 |       - baseline
11 |     bugs:
12 |       - 1497894, 1519120
13 |     data_reviews:
14 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
15 |     notification_emails:
16 |       - bob@example.com
17 |     expires: '2100-07-11'
18 | 
19 |   os:
20 |     type: string
21 |     lifetime: application
22 |     send_in_pings:
23 |       - baseline
24 |     description: |
25 |       Stop
26 |     bugs:
27 |       - 1497894
28 |     data_reviews:
29 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
30 |     notification_emails:
31 |       - telemetry-client-dev@mozilla.com
32 |     expires: never
33 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/normal/1/metrics.yaml:
--------------------------------------------------------------------------------
 1 | $schema: moz://mozilla.org/schemas/glean/metrics/1-0-0
 2 | 
 3 | example:
 4 |   duration:
 5 |     type: timespan
 6 |     description: |
 7 |       The duration of the last foreground session.
 8 |     time_unit: second
 9 |     send_in_pings:
10 |       - baseline
11 |     bugs:
12 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1497894
13 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1519120
14 |     data_reviews:
15 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
16 |     notification_emails:
17 |       - charlie@example.com
18 |     expires: '2100-07-11'
19 | 
20 |   os:
21 |     type: string
22 |     lifetime: application
23 |     send_in_pings:
24 |       - baseline
25 |     description: |
26 |       don't
27 |     bugs:
28 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1497894
29 |     data_reviews:
30 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
31 |     notification_emails:
32 |       - telemetry-client-dev@mozilla.com
33 |     expires: never
34 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/normal/2/metrics.yaml:
--------------------------------------------------------------------------------
 1 | $schema: moz://mozilla.org/schemas/glean/metrics/2-0-0
 2 | 
 3 | example:
 4 |   duration:
 5 |     type: timespan
 6 |     description: |
 7 |       The duration of the last foreground session.
 8 |     time_unit: second
 9 |     send_in_pings:
10 |       - baseline
11 |     bugs:
12 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1497894
13 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1519120
14 |     data_reviews:
15 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
16 |     notification_emails:
17 |       - charlie@example.com
18 |     expires: '2100-07-11'
19 | 
20 |   os:
21 |     type: string
22 |     lifetime: application
23 |     send_in_pings:
24 |       - baseline
25 |     description: |
26 |       pop
27 |     metadata:
28 |       tags:
29 |         - foo
30 |     bugs:
31 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1497894
32 |     data_reviews:
33 |       - https://bugzilla.mozilla.org/show_bug.cgi?id=1512938#c3
34 |     notification_emails:
35 |       - telemetry-client-dev@mozilla.com
36 |     expires: never
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/resources/test_repo_files/normal/2/tags.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | $schema: moz://mozilla.org/schemas/glean/tags/1-0-0
3 | 
4 | foo:
5 |   description: "the foo tag"
6 | 


--------------------------------------------------------------------------------
/tests/resources/test_scalars.yaml:
--------------------------------------------------------------------------------
  1 | # This file contains a definition of the scalar probes that are recorded in Telemetry.
  2 | # They are submitted with the "main" pings and can be inspected in about:telemetry.
  3 | 
  4 | # The following section is for probes testing the Telemetry system. They will not be
  5 | # submitted in pings and are only used for testing.
  6 | telemetry.test:
  7 |   unsigned_int_kind:
  8 |     bug_numbers:
  9 |       - 1276190
 10 |     description: >
 11 |       This is a test uint type with a really long description, maybe spanning even multiple
 12 |       lines, to just prove a point: everything works just fine.
 13 |     expires: never
 14 |     kind: uint
 15 |     notification_emails:
 16 |       - telemetry-client-dev@mozilla.com
 17 | 
 18 |   string_kind:
 19 |     bug_numbers:
 20 |       - 1276190
 21 |     description: A string test type with a one line comment that works just fine!
 22 |     expires: never
 23 |     kind: string
 24 |     notification_emails:
 25 |       - telemetry-client-dev@mozilla.com
 26 | 
 27 |   boolean_kind:
 28 |     bug_numbers:
 29 |       - 1281214
 30 |     description: A boolean test type with a one line comment that works just fine!
 31 |     expires: never
 32 |     kind: boolean
 33 |     notification_emails:
 34 |       - telemetry-client-dev@mozilla.com
 35 | 
 36 |   expired:
 37 |     bug_numbers:
 38 |       - 1276190
 39 |     description: This is an expired testing scalar; not meant to be touched.
 40 |     expires: 4.0a1
 41 |     kind: uint
 42 |     notification_emails:
 43 |       - telemetry-client-dev@mozilla.com
 44 | 
 45 |   unexpired:
 46 |     bug_numbers:
 47 |       - 1276190
 48 |     description: This is an unexpired testing scalar; not meant to be touched.
 49 |     expires: "375.0"
 50 |     kind: uint
 51 |     notification_emails:
 52 |       - telemetry-client-dev@mozilla.com
 53 | 
 54 |   release_optin:
 55 |     bug_numbers:
 56 |       - 1276190
 57 |     description: A testing scalar; not meant to be touched.
 58 |     expires: never
 59 |     kind: uint
 60 |     notification_emails:
 61 |       - telemetry-client-dev@mozilla.com
 62 |     release_channel_collection: opt-in
 63 | 
 64 |   release_optout:
 65 |     bug_numbers:
 66 |       - 1276190
 67 |     description: A testing scalar; not meant to be touched.
 68 |     expires: never
 69 |     kind: uint
 70 |     notification_emails:
 71 |       - telemetry-client-dev@mozilla.com
 72 |     release_channel_collection: opt-out
 73 | 
 74 |   keyed_release_optin:
 75 |     bug_numbers:
 76 |       - 1277806
 77 |     description: A testing scalar; not meant to be touched.
 78 |     expires: never
 79 |     kind: uint
 80 |     keyed: true
 81 |     notification_emails:
 82 |       - telemetry-client-dev@mozilla.com
 83 |     release_channel_collection: opt-in
 84 | 
 85 |   keyed_release_optout:
 86 |     bug_numbers:
 87 |       - 1277806
 88 |     description: A testing scalar; not meant to be touched.
 89 |     expires: never
 90 |     kind: uint
 91 |     keyed: true
 92 |     notification_emails:
 93 |       - telemetry-client-dev@mozilla.com
 94 |     release_channel_collection: opt-out
 95 | 
 96 |   keyed_expired:
 97 |     bug_numbers:
 98 |       - 1277806
 99 |     description: This is an expired testing scalar; not meant to be touched.
100 |     expires: 4.0a1
101 |     kind: uint
102 |     keyed: true
103 |     notification_emails:
104 |       - telemetry-client-dev@mozilla.com
105 | 
106 |   keyed_unsigned_int:
107 |     bug_numbers:
108 |       - 1277806
109 |     description: A testing keyed uint scalar; not meant to be touched.
110 |     expires: never
111 |     kind: uint
112 |     keyed: true
113 |     notification_emails:
114 |       - telemetry-client-dev@mozilla.com
115 | 
116 |   keyed_boolean_kind:
117 |     bug_numbers:
118 |       - 1277806
119 |     description: A testing keyed boolean scalar; not meant to be touched.
120 |     expires: never
121 |     kind: boolean
122 |     keyed: true
123 |     notification_emails:
124 |       - telemetry-client-dev@mozilla.com
125 |     record_in_processes:
126 |       - 'main'
127 |       - 'content'
128 | 
129 |   content_only_uint:
130 |     bug_numbers:
131 |       - 1278556
132 |     description: A testing uint scalar; not meant to be touched.
133 |     expires: never
134 |     kind: uint
135 |     notification_emails:
136 |       - telemetry-client-dev@mozilla.com
137 |     record_in_processes:
138 |       - 'content'
139 | 
140 |   all_processes_uint:
141 |     bug_numbers:
142 |       - 1278556
143 |     description: A testing uint scalar; not meant to be touched.
144 |     expires: never
145 |     kind: uint
146 |     notification_emails:
147 |       - telemetry-client-dev@mozilla.com
148 |     record_in_processes:
149 |       - 'all'
150 | 
151 |   all_child_processes_string:
152 |     bug_numbers:
153 |       - 1278556
154 |     description: A testing string scalar; not meant to be touched.
155 |     expires: never
156 |     kind: string
157 |     notification_emails:
158 |       - telemetry-client-dev@mozilla.com
159 |     record_in_processes:
160 |       - 'all_childs'
161 | 
162 | other.test:
163 |   test_probe:
164 |     bug_numbers:
165 |       - 1276190
166 |     description: >
167 |       This is a test uint type with a really long description, maybe spanning even multiple
168 |       lines, to just prove a point: everything works just fine.
169 |     expires: never
170 |     kind: uint
171 |     cpp_guard: 'XP_WIN'
172 |     notification_emails:
173 |       - telemetry-client-dev@mozilla.com
174 | 
175 |   multistore_probe:
176 |     bug_numbers:
177 |       - 1276190
178 |     description: >
179 |       This is a test uint type with a really long description, maybe spanning even multiple
180 |       lines, to just prove a point: everything works just fine.
181 |     expires: never
182 |     kind: uint
183 |     notification_emails:
184 |       - telemetry-client-dev@mozilla.com
185 |     record_into_store:
186 |       - main
187 |       - store2
188 | 


--------------------------------------------------------------------------------
/tests/test_buildhub.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import pytest
  4 | 
  5 | from probe_scraper.scrapers.buildhub import Buildhub, NoDataFoundException
  6 | 
  7 | FX_RELEASE_62_0_3 = {
  8 |     "revision": "c9ed11ae5c79df3dcb69075e1c9da0317d1ecb1b",
  9 |     "date": datetime(2018, 10, 1, 18, 40, 35),
 10 |     "version": "62.0.3rc1",
 11 |     "tree": "releases/mozilla-release",
 12 | }
 13 | 
 14 | VERBOSE = True
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def records():
 19 |     return [
 20 |         {
 21 |             "_source": {
 22 |                 "download": {"date": "2019-01-28T23:49:22.717388+00:00"},
 23 |                 "source": {"revision": "abc", "tree": "releases/mozilla-release"},
 24 |                 "target": {"version": "1"},
 25 |             }
 26 |         },
 27 |         {
 28 |             "_source": {
 29 |                 "download": {"date": "2019-01-29T23:49:22Z"},
 30 |                 "source": {"revision": "def", "tree": "releases/mozilla-release"},
 31 |                 "target": {"version": "2"},
 32 |             }
 33 |         },
 34 |     ]
 35 | 
 36 | 
 37 | @pytest.mark.web_dependency
 38 | def test_nightly_count():
 39 |     channel, min_version, max_version = "nightly", 62, 62
 40 | 
 41 |     bh = Buildhub()
 42 |     releases = bh.get_revision_dates(
 43 |         channel, min_version, max_version=max_version, verbose=VERBOSE
 44 |     )
 45 |     assert len(releases) == 97
 46 | 
 47 | 
 48 | @pytest.mark.web_dependency
 49 | def test_pagination():
 50 |     channel, min_version, max_version = "nightly", 62, 62
 51 | 
 52 |     bh = Buildhub()
 53 |     releases = bh.get_revision_dates(
 54 |         channel, min_version, max_version=max_version, verbose=VERBOSE, window=10
 55 |     )
 56 |     assert len(releases) == 97
 57 | 
 58 | 
 59 | @pytest.mark.web_dependency
 60 | def test_duplicate_revisions():
 61 |     channel, min_version, max_version = "nightly", 67, 67
 62 | 
 63 |     bh = Buildhub()
 64 |     releases = bh.get_revision_dates(
 65 |         channel, min_version, max_version=max_version, verbose=VERBOSE
 66 |     )
 67 |     assert len({r["revision"] for r in releases}) == len(releases)
 68 | 
 69 | 
 70 | @pytest.mark.web_dependency
 71 | def test_release():
 72 |     channel, min_version, max_version = "release", 62, 62
 73 | 
 74 |     bh = Buildhub()
 75 |     releases = bh.get_revision_dates(
 76 |         channel, min_version, max_version=max_version, verbose=VERBOSE
 77 |     )
 78 | 
 79 |     assert FX_RELEASE_62_0_3 in releases
 80 | 
 81 | 
 82 | @pytest.mark.web_dependency
 83 | def test_min_release():
 84 |     channel, min_version, max_version = "release", 63, 63
 85 | 
 86 |     bh = Buildhub()
 87 |     releases = bh.get_revision_dates(
 88 |         channel, min_version, max_version=max_version, verbose=VERBOSE
 89 |     )
 90 | 
 91 |     assert FX_RELEASE_62_0_3 not in releases
 92 | 
 93 | 
 94 | @pytest.mark.web_dependency
 95 | def test_no_min_max_version_overlap():
 96 |     channel, min_version, max_version = "release", 63, 62
 97 |     bh = Buildhub()
 98 | 
 99 |     with pytest.raises(NoDataFoundException):
100 |         bh.get_revision_dates(
101 |             channel, min_version, max_version=max_version, verbose=VERBOSE
102 |         )
103 | 
104 | 
105 | @pytest.mark.web_dependency
106 | def test_no_released_version():
107 |     channel, min_version = "release", 199
108 |     bh = Buildhub()
109 | 
110 |     with pytest.raises(NoDataFoundException):
111 |         bh.get_revision_dates(channel, min_version, verbose=VERBOSE)
112 | 
113 | 
114 | def test_version_200():
115 |     channel, min_version = "release", 200
116 |     bh = Buildhub()
117 | 
118 |     with pytest.raises(AssertionError):
119 |         bh.get_revision_dates(channel, min_version, verbose=VERBOSE)
120 | 
121 | 
122 | def test_cleaned_dates(records):
123 |     bh = Buildhub()
124 | 
125 |     expected = [
126 |         {
127 |             "revision": "abc",
128 |             "date": datetime(2019, 1, 28, 23, 49, 22, 717388),
129 |             "version": "1",
130 |             "tree": "releases/mozilla-release",
131 |         },
132 |         {
133 |             "revision": "def",
134 |             "date": datetime(2019, 1, 29, 23, 49, 22),
135 |             "version": "2",
136 |             "tree": "releases/mozilla-release",
137 |         },
138 |     ]
139 | 
140 |     assert bh._distinct_and_clean(records) == expected
141 | 
142 | 
143 | # Test unique and sorted values
144 | def test_unique_sorted(records):
145 |     bh = Buildhub()
146 | 
147 |     records[1]["_source"]["source"]["revision"] = "abc"
148 |     records[1]["_source"]["download"]["date"] = "2019-01-22T23:49:22Z"
149 | 
150 |     expected = [
151 |         {
152 |             "revision": "abc",
153 |             "date": datetime(2019, 1, 22, 23, 49, 22),
154 |             "version": "2",
155 |             "tree": "releases/mozilla-release",
156 |         },
157 |     ]
158 | 
159 |     assert bh._distinct_and_clean(records) == expected
160 | 


--------------------------------------------------------------------------------
/tests/test_event_parser.py:
--------------------------------------------------------------------------------
 1 | from probe_scraper.parsers.events import EventsParser
 2 | 
 3 | 
 4 | def is_string(s):
 5 |     return isinstance(s, str)
 6 | 
 7 | 
 8 | def test_event_parser():
 9 |     # Parse the events from the test definitions.
10 |     parser = EventsParser()
11 |     parsed_events = parser.parse(["tests/resources/test_events.yaml"], "55")
12 | 
13 |     # Make sure we loaded all the events.
14 |     assert len(parsed_events) == 5
15 | 
16 |     # Make sure each of them contains all the required fields and details.
17 |     REQUIRED_FIELDS = [
18 |         "cpp_guard",
19 |         "description",
20 |         "details",
21 |         "expiry_version",
22 |         "optout",
23 |         "bug_numbers",
24 |     ]
25 |     REQUIRED_DETAILS = ["methods", "objects", "extra_keys", "record_in_processes"]
26 | 
27 |     for name, data in parsed_events.items():
28 |         assert is_string(name)
29 | 
30 |         # Make sure we have all the required fields and details.
31 |         for field in REQUIRED_FIELDS:
32 |             assert field in data
33 | 
34 |         for field in REQUIRED_DETAILS:
35 |             assert field in data["details"]
36 | 
37 | 
38 | def parse(channel, version):
39 |     parser = EventsParser()
40 |     return parser.parse(["tests/resources/test_events.yaml"], version, channel)
41 | 
42 | 
43 | def test_channel_version_ignore():
44 |     assert parse("release", 52) == {}
45 |     assert parse("release", 53) != {}
46 | 
47 |     assert parse("beta", 52) == {}
48 |     assert parse("beta", 53) != {}
49 | 
50 |     assert parse("nightly", 52) == {}
51 |     assert parse("nightly", 53) == {}
52 |     assert parse("nightly", 54) != {}
53 | 


--------------------------------------------------------------------------------
/tests/test_fog_checks.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from pathlib import Path
  3 | from typing import Dict, List
  4 | 
  5 | import pytest
  6 | 
  7 | from probe_scraper import fog_checks, transform_probes
  8 | from probe_scraper.parsers.repositories import Repository
  9 | from probe_scraper.scrapers.git_scraper import Commit
 10 | 
 11 | FAKE_METRIC = {
 12 |     "type": "string",
 13 |     "expires": "never",
 14 |     "notification_emails": ["bar@foo.com"],
 15 |     "bugs": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1701769"],
 16 | }
 17 | 
 18 | FAKE_REPO_META = {
 19 |     "notification_emails": ["foo@bar.com"],
 20 | }
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def fake_latest_nightly_version() -> str:
 25 |     return "100"
 26 | 
 27 | 
 28 | @pytest.fixture
 29 | def fake_metrics(fake_latest_nightly_version) -> Dict[str, Dict]:
 30 |     return {
 31 |         "category.name.metric_name": FAKE_METRIC,
 32 |         "expired.category.name.metric_name": {**FAKE_METRIC, "expires": "expired"},
 33 |     }
 34 | 
 35 | 
 36 | @pytest.fixture
 37 | def fake_commit_timestamp() -> int:
 38 |     return int(datetime.now().timestamp())
 39 | 
 40 | 
 41 | @pytest.fixture
 42 | def fake_metrics_by_commit(
 43 |     fake_commit_timestamp, fake_metrics
 44 | ) -> Dict[str, Dict[str, Dict]]:
 45 |     return {
 46 |         Commit(
 47 |             hash="deadcode",
 48 |             timestamp=fake_commit_timestamp,
 49 |             reflog_index=0,
 50 |             is_head=True,
 51 |         ): {
 52 |             **fake_metrics,
 53 |             "newer.category.name.metric_name": FAKE_METRIC,
 54 |         },
 55 |         Commit(
 56 |             hash="decafcaf",
 57 |             timestamp=fake_commit_timestamp,
 58 |             reflog_index=1,
 59 |             is_head=False,
 60 |         ): fake_metrics,
 61 |     }
 62 | 
 63 | 
 64 | @pytest.fixture
 65 | def fake_commits(fake_commit_timestamp) -> Dict[Commit, List[Path]]:
 66 |     # `decafcaf` should remain the most recent SHA.
 67 |     return {
 68 |         Commit(
 69 |             hash="decafcaf",
 70 |             timestamp=fake_commit_timestamp,
 71 |             reflog_index=1,
 72 |             is_head=False,
 73 |         ): [],
 74 |         Commit(
 75 |             hash="deadcode",
 76 |             timestamp=fake_commit_timestamp,
 77 |             reflog_index=0,
 78 |             is_head=True,
 79 |         ): [],
 80 |     }
 81 | 
 82 | 
 83 | @pytest.fixture
 84 | def fake_metrics_by_repo_by_commit(
 85 |     fake_metrics_by_commit, fake_repos
 86 | ) -> Dict[str, Dict[Commit, Dict[str, Dict]]]:
 87 |     return {
 88 |         repo.name: {
 89 |             commit: {
 90 |                 f"{metric_name}_{repo.name}": metric
 91 |                 for metric_name, metric in metrics.items()
 92 |             }
 93 |             for commit, metrics in fake_metrics_by_commit.items()
 94 |         }
 95 |         for repo in fake_repos
 96 |     }
 97 | 
 98 | 
 99 | @pytest.fixture
100 | def fake_metrics_by_repo(
101 |     fake_metrics_by_repo_by_commit,
102 | ) -> Dict[str, Dict[str, Dict[str, Dict]]]:
103 |     return transform_probes.transform_metrics_by_hash(fake_metrics_by_repo_by_commit)
104 | 
105 | 
106 | @pytest.fixture
107 | def fake_repos() -> List[Repository]:
108 |     return [
109 |         Repository("glean-core", dict(FAKE_REPO_META, library_names=["glean-core"])),
110 |         Repository("firefox-desktop", dict(FAKE_REPO_META, dependencies=["gecko"])),
111 |         Repository("gecko", dict(FAKE_REPO_META, dependencies=["glean-core"])),
112 |     ]
113 | 
114 | 
115 | @pytest.fixture
116 | def fake_commits_by_repo(
117 |     fake_repos, fake_commits
118 | ) -> Dict[str, Dict[Commit, List[Path]]]:
119 |     return {repo.name: fake_commits for repo in fake_repos}
120 | 
121 | 
122 | def test_get_current_metrics(fake_metrics_by_repo):
123 |     current_metrics_by_repo = fog_checks.get_current_metrics_by_repo(
124 |         fake_metrics_by_repo
125 |     )
126 |     assert (
127 |         "newer.category.name.metric_name_glean-core"
128 |         in current_metrics_by_repo["glean-core"]
129 |     )
130 | 
131 | 
132 | def test_get_expiring_metrics(fake_metrics, fake_latest_nightly_version):
133 |     expiring_metrics = fog_checks.get_expiring_metrics(
134 |         {
135 |             **fake_metrics,
136 |             "expiring.metric_name": {
137 |                 **FAKE_METRIC,
138 |                 "expires": str(int(fake_latest_nightly_version) + 1),
139 |             },
140 |         },
141 |         fake_latest_nightly_version,
142 |     )
143 |     assert "expired.category.name.metric_name" in expiring_metrics
144 |     assert "expiring.metric_name" in expiring_metrics
145 |     assert "category.name.metric_name" not in expiring_metrics
146 | 
147 | 
148 | def test_fbagefem_does_nothing_with_no_fog_repos(fake_metrics_by_repo, fake_repos):
149 |     fake_repos = [repo for repo in fake_repos if repo.name not in fog_checks.FOG_REPOS]
150 |     fake_metrics_by_repo = {
151 |         repo_name: metrics
152 |         for repo_name, metrics in fake_metrics_by_repo.items()
153 |         if repo_name not in fog_checks.FOG_REPOS
154 |     }
155 |     expiry_emails = fog_checks.file_bugs_and_get_emails_for_expiring_metrics(
156 |         fake_repos, fake_metrics_by_repo, None, True
157 |     )
158 |     assert expiry_emails is None
159 | 
160 | 
161 | @pytest.mark.web_dependency  # fbagefem gets the latest nightly version from product-info
162 | def test_fbagefem_returns_emails_for_expiring_metrics(fake_metrics_by_repo, fake_repos):
163 |     expiry_emails = fog_checks.file_bugs_and_get_emails_for_expiring_metrics(
164 |         fake_repos,
165 |         fake_metrics_by_repo,
166 |         None,
167 |         True,
168 |     )
169 |     for fog_repo in fog_checks.FOG_REPOS:
170 |         assert f"expired_metrics_{fog_repo}" in expiry_emails
171 |         assert len(expiry_emails[f"expired_metrics_{fog_repo}"]["emails"]) == 1
172 | 
173 | 
174 | def test_bug_number_from_url():
175 |     assert (
176 |         fog_checks.bug_number_from_url(
177 |             "https://bugzilla.mozilla.org/show_bug.cgi?id=1701769"
178 |         )
179 |         == 1701769
180 |     )
181 |     assert (
182 |         fog_checks.bug_number_from_url("https://bugzilla.mozilla.org/1885138")
183 |         == 1885138
184 |     )
185 |     assert (
186 |         fog_checks.bug_number_from_url(
187 |             "https://bugzilla.mozilla.org/show_bug.cgi?id=1701769#c1"
188 |         )
189 |         == 1701769
190 |     )
191 |     assert fog_checks.bug_number_from_url("https://bugzil.la/1701769") == 1701769
192 |     # Parser shouldn't give a good number for github urls
193 |     assert (
194 |         fog_checks.bug_number_from_url(
195 |             "https://github.com/mozilla/probe-scraper/pull/382"
196 |         )
197 |         is None
198 |     )
199 | 


--------------------------------------------------------------------------------
/tests/test_glean_checks.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from probe_scraper.glean_checks import check_for_duplicate_metrics
  4 | from probe_scraper.parsers.repositories import Repository
  5 | 
  6 | OLD_DATE = "2019-10-04 17:30:42"
  7 | NEWER_DATE = "2019-12-04 17:30:42"
  8 | NEWEST_DATE = "2020-12-04 17:30:42"
  9 | 
 10 | BASE_METADATA = {"notification_emails": ["foo@bar.com"]}
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def fake_repositories():
 15 |     return [
 16 |         Repository("glean-core", dict(BASE_METADATA, library_names=["glean-core"])),
 17 |         Repository(
 18 |             "glean-android", dict(BASE_METADATA, library_names=["glean-android"])
 19 |         ),
 20 |         Repository(
 21 |             "fake-app",
 22 |             dict(BASE_METADATA, dependencies=["glean-core", "glean-android"]),
 23 |         ),
 24 |     ]
 25 | 
 26 | 
 27 | def test_check_duplicate_metrics_no_duplicates(fake_repositories):
 28 |     # no overlap between metrics defined by glean-core and glean-android (both used by burnham)
 29 |     # check_for_duplicate_metrics should return False
 30 |     assert not check_for_duplicate_metrics(
 31 |         fake_repositories,
 32 |         {
 33 |             "glean-core": {
 34 |                 "app_display_version": {
 35 |                     "history": [
 36 |                         dict(
 37 |                             BASE_METADATA, dates={"first": OLD_DATE, "last": NEWER_DATE}
 38 |                         )
 39 |                     ]
 40 |                 }
 41 |             },
 42 |             "glean-android": {
 43 |                 "app_display_version_android": {
 44 |                     "history": [
 45 |                         dict(
 46 |                             BASE_METADATA, dates={"first": OLD_DATE, "last": NEWER_DATE}
 47 |                         )
 48 |                     ]
 49 |                 }
 50 |             },
 51 |             "fake-app": {},
 52 |         },
 53 |         {},
 54 |     )
 55 | 
 56 | 
 57 | def test_check_duplicate_metrics_duplicates(fake_repositories):
 58 |     # glean-core and glean-android define the same metric in the current date
 59 |     # check_for_duplicate_metrics should return True
 60 |     assert check_for_duplicate_metrics(
 61 |         fake_repositories,
 62 |         {
 63 |             "glean-core": {
 64 |                 "app_display_version": {
 65 |                     "history": [
 66 |                         dict(
 67 |                             BASE_METADATA, dates={"first": OLD_DATE, "last": NEWER_DATE}
 68 |                         )
 69 |                     ]
 70 |                 }
 71 |             },
 72 |             "glean-android": {
 73 |                 "app_display_version": {
 74 |                     "history": [
 75 |                         dict(
 76 |                             BASE_METADATA, dates={"first": OLD_DATE, "last": NEWER_DATE}
 77 |                         )
 78 |                     ]
 79 |                 },
 80 |             },
 81 |             "fake-app": {},
 82 |         },
 83 |         {},
 84 |     )
 85 | 
 86 | 
 87 | def test_check_duplicate_metrics_duplicates_in_the_past(fake_repositories):
 88 |     # glean-core and glean-android define the same metric at one point in the
 89 |     # past, but not presently
 90 |     # check_for_duplicate_metrics should return False
 91 |     assert not check_for_duplicate_metrics(
 92 |         fake_repositories,
 93 |         {
 94 |             "glean-core": {
 95 |                 "app_display_version": {
 96 |                     "history": [
 97 |                         dict(
 98 |                             BASE_METADATA, dates={"first": OLD_DATE, "last": NEWER_DATE}
 99 |                         )
100 |                     ]
101 |                 }
102 |             },
103 |             "glean-android": {
104 |                 "app_display_version": {
105 |                     "history": [
106 |                         dict(
107 |                             BASE_METADATA, dates={"first": OLD_DATE, "last": NEWER_DATE}
108 |                         ),
109 |                     ]
110 |                 },
111 |                 "new_metric": {
112 |                     "history": [
113 |                         # the newer date here implies that app_display_version above was removed
114 |                         dict(
115 |                             BASE_METADATA,
116 |                             dates={"first": OLD_DATE, "last": NEWEST_DATE},
117 |                         ),
118 |                     ]
119 |                 },
120 |             },
121 |             "fake-app": {},
122 |         },
123 |         {},
124 |     )
125 | 


--------------------------------------------------------------------------------
/tests/test_glean_limit_date.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | 
  6 | import json
  7 | import os
  8 | from datetime import datetime, time, timedelta
  9 | from pathlib import Path
 10 | from uuid import uuid4
 11 | 
 12 | import git
 13 | import pytest
 14 | import yaml
 15 | from git import Head, Repo
 16 | 
 17 | import probe_scraper.runner
 18 | 
 19 | 
 20 | @pytest.fixture
 21 | def test_dir(tmp_path_factory) -> Path:
 22 |     # Where we will build the test git repo
 23 |     return tmp_path_factory.mktemp("test_git_repositories")
 24 | 
 25 | 
 26 | def generate_repo(
 27 |     test_dir: Path,
 28 |     repo_name: str,
 29 |     branch: str = "main",
 30 |     skip_commits: int = 0,
 31 |     num_commits: int = 1,
 32 |     base_dir: Path = Path("tests/resources/test_repo_files"),
 33 |     base_datetime: datetime = datetime.utcnow(),
 34 | ) -> Path:
 35 |     directory = test_dir / f"{repo_name}-{uuid4().hex}"
 36 |     repo = Repo.init(directory)
 37 |     # Ensure the default branch is using a fixed name.
 38 |     # User config could change that,
 39 |     # breaking tests with implicit assumptions further down the line.
 40 |     repo.head.reference = Head(repo, f"refs/heads/{branch}")
 41 | 
 42 |     base_path = base_dir / repo_name
 43 |     for i in range(skip_commits, skip_commits + num_commits):
 44 |         files_dir = base_path / str(i)
 45 |         if not files_dir.exists():
 46 |             break
 47 | 
 48 |         for path in files_dir.iterdir():
 49 |             print(f"Copying file {path.name}")
 50 |             destination = directory / path.name
 51 |             destination.write_bytes(path.read_bytes())
 52 | 
 53 |         repo.index.add("*")
 54 |         # We need to synthesize the timestamps of commits to each be a second
 55 |         # apart, otherwise the commits may be at exactly the same second, which
 56 |         # means they won't always sort in order, and thus the merging of identical
 57 |         # metrics in adjacent commits may not happen correctly.
 58 |         commit_date = f"{base_datetime + timedelta(seconds=i):%Y-%m-%dT%H:%M:%S}"
 59 |         repo.index.commit(f"Commit {i}", commit_date=commit_date)
 60 | 
 61 |     return directory
 62 | 
 63 | 
 64 | def test_single_commit(test_dir: Path):
 65 |     today_date = datetime.utcnow().date()
 66 |     today_datetime = datetime.combine(today_date, time.min)
 67 |     repo_path = generate_repo(
 68 |         test_dir,
 69 |         "normal",
 70 |         num_commits=2,
 71 |         # each commit after the first adds 1 second to base_datetime, so setting
 72 |         # glean_limit_date=today_date and base_datetime to 1 second before that will
 73 |         # only collect the second commit
 74 |         base_datetime=today_datetime - timedelta(seconds=1),
 75 |     )
 76 | 
 77 |     repositories_info = {
 78 |         "version": "2",
 79 |         "libraries": [],
 80 |         "applications": [
 81 |             {
 82 |                 "app_name": "example",
 83 |                 "canonical_app_name": "Example",
 84 |                 "app_description": "foo",
 85 |                 "url": str(repo_path),
 86 |                 "notification_emails": ["nobody@example.com"],
 87 |                 "metrics_files": ["metrics.yaml"],
 88 |                 "channels": [
 89 |                     {
 90 |                         "v1_name": "example",
 91 |                         "app_id": "example",
 92 |                         "app_channel": "release",
 93 |                     }
 94 |                 ],
 95 |             }
 96 |         ],
 97 |     }
 98 |     repositories_file = test_dir / "repositories.yaml"
 99 |     repositories_file.write_text(yaml.dump(repositories_info))
100 | 
101 |     # generate output with date limit
102 |     actual_kwargs = dict(
103 |         cache_dir=test_dir / "cache",
104 |         out_dir=test_dir / "actual",
105 |         firefox_version=None,
106 |         min_firefox_version=None,
107 |         process_moz_central_probes=False,
108 |         process_glean_metrics=True,
109 |         repositories_file=repositories_file,
110 |         dry_run=True,
111 |         glean_repos=None,
112 |         firefox_channel=None,
113 |         output_bucket="",
114 |         cache_bucket=None,
115 |         env="dev",
116 |         bugzilla_api_key=None,
117 |         glean_urls=[str(repo_path)],
118 |         glean_commit=None,
119 |         glean_commit_branch=None,
120 |         email_file=test_dir / "emails.txt",
121 |         update=True,
122 |         glean_limit_date=today_date,
123 |     )
124 |     probe_scraper.runner.main(**actual_kwargs)
125 | 
126 |     # shallow clone repo with single commit to generate expected output
127 |     original_repo_path = repo_path.parent / f"{repo_path.name}-original"
128 |     os.rename(repo_path, original_repo_path)
129 |     # must use file:// or git will ignore --depth
130 |     git.Repo.clone_from(f"file://{original_repo_path.absolute()}", repo_path, depth=1)
131 |     expect_kwargs = {
132 |         **actual_kwargs,
133 |         "update": False,
134 |         "out_dir": test_dir / "expect",
135 |         "glean_limit_date": None,
136 |     }
137 |     probe_scraper.runner.main(**expect_kwargs)
138 |     # validate
139 |     expect_metrics = json.loads(
140 |         (test_dir / "expect" / "glean" / "example" / "metrics").read_text()
141 |     )
142 |     actual_metrics = json.loads(
143 |         (test_dir / "actual" / "glean" / "example" / "metrics").read_text()
144 |     )
145 |     assert expect_metrics == actual_metrics
146 | 
147 | 
148 | def test_add_commit(test_dir: Path):
149 |     today_date = datetime.utcnow().date()
150 |     today_datetime = datetime.combine(today_date, time.min)
151 |     repo_path = generate_repo(
152 |         test_dir,
153 |         "normal",
154 |         num_commits=2,
155 |         # each commit after the first adds 1 second to base_datetime, so setting
156 |         # glean_limit_date=today_date and base_datetime to 1 second before that will
157 |         # only collect the second commit
158 |         base_datetime=today_datetime - timedelta(seconds=1),
159 |     )
160 | 
161 |     repositories_info = {
162 |         "version": "2",
163 |         "libraries": [],
164 |         "applications": [
165 |             {
166 |                 "app_name": "example",
167 |                 "canonical_app_name": "Example",
168 |                 "app_description": "foo",
169 |                 "url": str(repo_path),
170 |                 "notification_emails": ["nobody@example.com"],
171 |                 "metrics_files": ["metrics.yaml"],
172 |                 "channels": [
173 |                     {
174 |                         "v1_name": "example",
175 |                         "app_id": "example",
176 |                         "app_channel": "release",
177 |                     }
178 |                 ],
179 |             }
180 |         ],
181 |     }
182 |     repositories_file = test_dir / "repositories.yaml"
183 |     repositories_file.write_text(yaml.dump(repositories_info))
184 | 
185 |     # generate expected output without date limit
186 |     expect_kwargs = dict(
187 |         cache_dir=test_dir / "cache",
188 |         out_dir=test_dir / "expect",
189 |         firefox_version=None,
190 |         min_firefox_version=None,
191 |         process_moz_central_probes=False,
192 |         process_glean_metrics=True,
193 |         repositories_file=repositories_file,
194 |         dry_run=True,
195 |         glean_repos=None,
196 |         firefox_channel=None,
197 |         output_bucket="",
198 |         cache_bucket=None,
199 |         env="dev",
200 |         bugzilla_api_key=None,
201 |         glean_urls=[str(repo_path)],
202 |         glean_commit=None,
203 |         glean_commit_branch=None,
204 |         email_file=test_dir / "emails.txt",
205 |         update=False,
206 |         glean_limit_date=None,
207 |     )
208 |     probe_scraper.runner.main(**expect_kwargs)
209 | 
210 |     # clone repo with only first commit to initialize state before updating
211 |     actual_kwargs = {**expect_kwargs, "out_dir": test_dir / "actual"}
212 |     original_repo_path = repo_path.parent / f"{repo_path.name}-original"
213 |     os.rename(repo_path, original_repo_path)
214 |     repo = git.Repo.clone_from(original_repo_path, repo_path)
215 |     repo.git.reset("HEAD~", hard=True)
216 |     probe_scraper.runner.main(**actual_kwargs)
217 |     # validate files are initially different
218 |     expect_metrics = json.loads(
219 |         (test_dir / "expect" / "glean" / "example" / "metrics").read_text()
220 |     )
221 |     actual_metrics = json.loads(
222 |         (test_dir / "actual" / "glean" / "example" / "metrics").read_text()
223 |     )
224 |     assert expect_metrics != actual_metrics
225 | 
226 |     # update with second commit and date limit
227 |     repo.git.pull()
228 |     actual_kwargs["update"] = True
229 |     actual_kwargs["glean_limit_date"] = today_date
230 |     probe_scraper.runner.main(**actual_kwargs)
231 |     # validate files are now equivalent
232 |     expect_metrics = json.loads(
233 |         (test_dir / "expect" / "glean" / "example" / "metrics").read_text()
234 |     )
235 |     for metric in expect_metrics:
236 |         for element in expect_metrics[metric]["history"]:
237 |             for index in ("first", "last"):
238 |                 # reflog index is expected to be inaccurate in update mode
239 |                 element["reflog-index"][index] = 0
240 |     actual_metrics = json.loads(
241 |         (test_dir / "actual" / "glean" / "example" / "metrics").read_text()
242 |     )
243 |     assert expect_metrics == actual_metrics
244 | 


--------------------------------------------------------------------------------
/tests/test_glean_push.py:
--------------------------------------------------------------------------------
  1 | # This Source Code Form is subject to the terms of the Mozilla Public
  2 | # License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | 
  6 | import datetime
  7 | import os
  8 | import time
  9 | import unittest.mock
 10 | from contextlib import contextmanager
 11 | from pathlib import Path
 12 | from unittest.mock import Mock
 13 | 
 14 | import pytest
 15 | import yaml
 16 | from git import Head, Repo
 17 | 
 18 | from probe_scraper import glean_push
 19 | 
 20 | 
 21 | @contextmanager
 22 | def pushd(path: Path):
 23 |     cwd = os.getcwd()
 24 |     try:
 25 |         os.chdir(path)
 26 |         yield
 27 |     finally:
 28 |         os.chdir(cwd)
 29 | 
 30 | 
 31 | @pytest.fixture(autouse=True)
 32 | def empty_output_bucket():
 33 |     with unittest.mock.patch.dict(os.environ, {"OUTPUT_BUCKET": ""}):
 34 |         yield
 35 | 
 36 | 
 37 | @pytest.fixture
 38 | def test_dir(tmp_path_factory) -> Path:
 39 |     # Where we will build the test git repo
 40 |     return tmp_path_factory.mktemp("test_git_repositories")
 41 | 
 42 | 
 43 | @pytest.fixture
 44 | def repositories_file(test_dir: Path) -> Path:
 45 |     # Where we will write the repositories file
 46 |     return test_dir / "repositories.yaml"
 47 | 
 48 | 
 49 | def generate_repo(
 50 |     test_dir: Path,
 51 |     repo_name: str,
 52 |     branch: str = "main",
 53 |     num_commits: int = 1,
 54 |     base_dir: Path = Path("tests/resources/test_repo_files"),
 55 | ) -> Path:
 56 |     directory = test_dir / repo_name
 57 |     repo = Repo.init(directory)
 58 |     # Ensure the default branch is using a fixed name.
 59 |     # User config could change that,
 60 |     # breaking tests with implicit assumptions further down the line.
 61 |     repo.head.reference = Head(repo, f"refs/heads/{branch}")
 62 | 
 63 |     # We need to synthesize the time stamps of commits to each be a second
 64 |     # apart, otherwise the commits may be at exactly the same second, which
 65 |     # means they won't always sort in order, and thus the merging of identical
 66 |     # metrics in adjacent commits may not happen correctly.
 67 |     base_time = time.time()
 68 | 
 69 |     base_path = base_dir / repo_name
 70 |     for i in range(num_commits):
 71 |         files_dir = base_path / str(i)
 72 |         if not files_dir.exists():
 73 |             break
 74 | 
 75 |         for path in files_dir.iterdir():
 76 |             print(f"Copying file {path.name}")
 77 |             destination = directory / path.name
 78 |             destination.write_bytes(path.read_bytes())
 79 | 
 80 |         repo.index.add("*")
 81 |         commit_date = datetime.datetime.fromtimestamp(base_time + i).isoformat()
 82 |         commit_date = commit_date[: commit_date.find(".")]
 83 |         repo.index.commit("Commit {index}".format(index=i), commit_date=commit_date)
 84 | 
 85 |     return directory
 86 | 
 87 | 
 88 | def test_missing_metrics_file(test_dir: Path, repositories_file: Path):
 89 |     repo_path = generate_repo(test_dir, "normal")
 90 |     commit = Repo(repo_path).head.commit.hexsha
 91 |     data = {"url": str(repo_path), "commit": commit, "branch": ""}
 92 |     request = Mock(get_json=Mock(return_value=data))
 93 | 
 94 |     repositories_info = {
 95 |         "version": "2",
 96 |         "libraries": [],
 97 |         "applications": [
 98 |             {
 99 |                 "app_name": "example",
100 |                 "canonical_app_name": "Example",
101 |                 "app_description": "foo",
102 |                 "url": str(repo_path),
103 |                 "notification_emails": ["nobody@example.com"],
104 |                 "metrics_files": ["missing/metrics.yaml"],
105 |                 "channels": [
106 |                     {
107 |                         "v1_name": "example",
108 |                         "app_id": "app-id",
109 |                         "app_channel": "release",
110 |                     }
111 |                 ],
112 |             }
113 |         ],
114 |     }
115 |     repositories_file.write_text(yaml.dump(repositories_info))
116 |     with pushd(repositories_file.parent):
117 |         response = glean_push.main(request)
118 |     assert response.status_code == 400
119 |     assert (
120 |         response.data.decode()
121 |         == f"Error: missing/metrics.yaml not found in commit {commit} for app-id\n"
122 |     )
123 | 
124 |     repositories_info["applications"][0]["deprecated"] = True
125 |     repositories_file.write_text(yaml.dump(repositories_info))
126 |     with pushd(repositories_file.parent):
127 |         response = glean_push.main(request)
128 |     assert response.status_code == 200
129 |     assert response.data.decode() == "update is valid, but not published\n"
130 | 


--------------------------------------------------------------------------------
/tests/test_histogram_parser.py:
--------------------------------------------------------------------------------
  1 | from probe_scraper.parsers.histograms import HistogramsParser
  2 | 
  3 | 
  4 | def is_string(s):
  5 |     return isinstance(s, str)
  6 | 
  7 | 
  8 | def histogram_parser(version, usecounter_optout):
  9 |     FILES = [
 10 |         "tests/resources/Histograms.json",
 11 |         "tests/resources/nsDeprecatedOperationList.h",
 12 |         "tests/resources/UseCounters.conf",
 13 |     ]
 14 | 
 15 |     HISTOGRAMS = [
 16 |         "TELEMETRY_TEST_FLAG",
 17 |         "TELEMETRY_TEST_COUNT",
 18 |         "TELEMETRY_TEST_COUNT2",
 19 |         "TELEMETRY_TEST_COUNT_INIT_NO_RECORD",
 20 |         "TELEMETRY_TEST_CATEGORICAL",
 21 |         "TELEMETRY_TEST_CATEGORICAL_OPTOUT",
 22 |         "TELEMETRY_TEST_CATEGORICAL_NVALUES",
 23 |         "TELEMETRY_TEST_CATEGORICAL_EMPTY_LABELS",
 24 |         "TELEMETRY_TEST_KEYED_COUNT_INIT_NO_RECORD",
 25 |         "TELEMETRY_TEST_KEYED_FLAG",
 26 |         "TELEMETRY_TEST_KEYED_COUNT",
 27 |         "TELEMETRY_TEST_KEYED_BOOLEAN",
 28 |         "TELEMETRY_TEST_RELEASE_OPTOUT",
 29 |         "TELEMETRY_TEST_RELEASE_OPTIN",
 30 |         "TELEMETRY_TEST_KEYED_RELEASE_OPTIN",
 31 |         "TELEMETRY_TEST_KEYED_RELEASE_OPTOUT",
 32 |         "TELEMETRY_TEST_EXPONENTIAL",
 33 |         "TELEMETRY_TEST_LINEAR",
 34 |         "TELEMETRY_TEST_BOOLEAN",
 35 |         "TELEMETRY_TEST_EXPIRED",
 36 |         "TELEMETRY_TEST_ALL_CHILDREN",
 37 |         "TELEMETRY_TEST_ALL_CHILDS",
 38 |         "EXPRESSION_IN_LOW_HIGH_ATTRIBUTE",
 39 |         "NON_INTEGER_IN_HIGH_ATTRIBUTE",
 40 |         "HISTOGRAM_WITH_MULTISTORE",
 41 |     ]
 42 | 
 43 |     USE_COUNTERS = [
 44 |         "USE_COUNTER2_SVGSVGELEMENT_GETELEMENTBYID_DOCUMENT",
 45 |         "USE_COUNTER2_SVGSVGELEMENT_GETELEMENTBYID_PAGE",
 46 |         "USE_COUNTER2_SVGSVGELEMENT_CURRENTSCALE_getter_DOCUMENT",
 47 |         "USE_COUNTER2_SVGSVGELEMENT_CURRENTSCALE_getter_PAGE",
 48 |         "USE_COUNTER2_SVGSVGELEMENT_CURRENTSCALE_setter_DOCUMENT",
 49 |         "USE_COUNTER2_SVGSVGELEMENT_CURRENTSCALE_setter_PAGE",
 50 |         "USE_COUNTER2_PROPERTY_FILL_DOCUMENT",
 51 |         "USE_COUNTER2_PROPERTY_FILL_PAGE",
 52 |     ]
 53 | 
 54 |     DEPRECATED_OPERATIONS = [
 55 |         "USE_COUNTER2_DEPRECATED_GetAttributeNode_DOCUMENT",
 56 |         "USE_COUNTER2_DEPRECATED_GetAttributeNode_PAGE",
 57 |         "USE_COUNTER2_DEPRECATED_SetAttributeNode_DOCUMENT",
 58 |         "USE_COUNTER2_DEPRECATED_SetAttributeNode_PAGE",
 59 |     ]
 60 | 
 61 |     # Parse the histograms from the test definitions.
 62 |     parser = HistogramsParser()
 63 |     parsed_histograms = parser.parse(FILES, version)
 64 | 
 65 |     # Check that all expected histogram keys are present.
 66 |     ALL_KEYS = HISTOGRAMS + USE_COUNTERS + DEPRECATED_OPERATIONS
 67 |     assert set(ALL_KEYS) == set(parsed_histograms.keys())
 68 | 
 69 |     # Make sure each of them contains all the required fields and details.
 70 |     REQUIRED_FIELDS = [
 71 |         "cpp_guard",
 72 |         "description",
 73 |         "details",
 74 |         "expiry_version",
 75 |         "optout",
 76 |         "bug_numbers",
 77 |     ]
 78 | 
 79 |     REQUIRED_DETAILS = [
 80 |         "low",
 81 |         "high",
 82 |         "keyed",
 83 |         "kind",
 84 |         "n_buckets",
 85 |         "record_in_processes",
 86 |         "record_into_store",
 87 |     ]
 88 | 
 89 |     for name, data in parsed_histograms.items():
 90 |         assert is_string(name)
 91 | 
 92 |         # Check that we have all the required fields for each probe.
 93 |         for field in REQUIRED_FIELDS:
 94 |             assert field in data
 95 | 
 96 |         # Check that we have all the needed details.
 97 |         for field in REQUIRED_DETAILS:
 98 |             assert field in data["details"]
 99 | 
100 |         # If multiple stores set, they should be both listed
101 |         if name == "HISTOGRAM_WITH_MULTISTORE":
102 |             assert ["main", "store2"] == data["details"]["record_into_store"]
103 |         else:
104 |             # Default multistore if unspecified is just "main"
105 |             assert ["main"] == data["details"]["record_into_store"]
106 | 
107 |         # Categorical histograms should have a non-empty `details["labels"]`.
108 |         if data["details"]["kind"] == "categorical":
109 |             assert "labels" in data["details"].keys() and isinstance(
110 |                 data["details"]["labels"], list
111 |             )
112 |         else:
113 |             assert "labels" not in data["details"].keys()
114 | 
115 |         if name.startswith("USE_COUNTER2_"):
116 |             assert data["optout"] == usecounter_optout
117 | 
118 | 
119 | # Test for an old Firefox version.
120 | def test_histogram_parser_old():
121 |     histogram_parser("55", usecounter_optout=False)
122 | 
123 | 
124 | # Test for a newer Firefox version with Use Counters on release
125 | def test_histogram_parser_new():
126 |     histogram_parser("70", usecounter_optout=True)
127 | 


--------------------------------------------------------------------------------
/tests/test_library_refs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | 
 3 | 
 4 | def test_library_refs():
 5 |     yaml_file = open("repositories.yaml", "r")
 6 |     repositories = yaml.safe_load(yaml_file)
 7 |     yaml_file.close()
 8 |     libs = set()
 9 |     for library in repositories["libraries"]:
10 |         for variant in library["variants"]:
11 |             libs.add(variant["dependency_name"])
12 |     for app in repositories["applications"]:
13 |         missing_libs = set(app["dependencies"]) - libs
14 |         if missing_libs:
15 |             raise KeyError(
16 |                 f'application {app["app_name"]} contains invalid library references: {missing_libs}'
17 |             )
18 | 


--------------------------------------------------------------------------------
/tests/test_metrics_parser.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from probe_scraper.parsers.metrics import GleanMetricsParser
 4 | 
 5 | 
 6 | def is_string(s):
 7 |     return isinstance(s, str)
 8 | 
 9 | 
10 | def test_metrics_parser():
11 |     # Parse the histograms from the test definitions.
12 |     parser = GleanMetricsParser()
13 |     parsed_metrics, errs = parser.parse(["tests/resources/metrics.yaml"], {})
14 | 
15 |     assert errs == []
16 | 
17 |     # Make sure we loaded all the metrics.
18 |     # Notably, we do not check the contents; that is left up to the
19 |     # glean parser to handle.
20 |     assert len(parsed_metrics) == 2
21 |     for name, data in parsed_metrics.items():
22 |         assert is_string(name)
23 | 
24 |     # Check that ping names are normalized
25 |     assert "session-end" in parsed_metrics["example.os"]["send_in_pings"]
26 | 
27 | 
28 | def test_source_url():
29 |     parser = GleanMetricsParser()
30 |     parsed_metrics, errs = parser.parse(
31 |         ["tests/resources/metrics.yaml"], {}, "https://www.test.com/foo", "tests"
32 |     )
33 | 
34 |     assert (
35 |         parsed_metrics["example.duration"]["source_url"]
36 |         == "https://www.test.com/foo/blob/tests/resources/metrics.yaml#L4"
37 |     )
38 |     assert (
39 |         parsed_metrics["example.os"]["source_url"]
40 |         == "https://www.test.com/foo/blob/tests/resources/metrics.yaml#L19"
41 |     )
42 |     with pytest.raises(KeyError):
43 |         parsed_metrics["example.os"]["defined_in"]
44 | 


--------------------------------------------------------------------------------
/tests/test_moz_central_scraper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import pytest
 5 | 
 6 | from probe_scraper.scrapers import moz_central_scraper
 7 | 
 8 | 
 9 | def test_extract_major_version():
10 |     assert moz_central_scraper.extract_major_version("62.0a1") == 62
11 |     assert moz_central_scraper.extract_major_version("63.0.2") == 63
12 |     with pytest.raises(Exception):
13 |         moz_central_scraper.extract_major_version("helloworld")
14 | 
15 | 
16 | def path_is_in_version(path, version):
17 |     return moz_central_scraper.relative_path_is_in_version(path, version)
18 | 
19 | 
20 | @pytest.mark.web_dependency
21 | def test_channel_revisions():
22 |     tmp_dir = "./.test-files"
23 |     min_fx_version = 62
24 |     max_fx_version = 62
25 |     channel = "release"
26 |     revision = "c9ed11ae5c79df3dcb69075e1c9da0317d1ecb1b"
27 | 
28 |     res = moz_central_scraper.scrape_channel_revisions(
29 |         tmp_dir, min_fx_version, max_fx_version=max_fx_version, channels=[channel]
30 |     )
31 | 
32 |     registries = {
33 |         probe_type: [
34 |             os.path.join(tmp_dir, "hg", revision, path)
35 |             for path in paths
36 |             if path_is_in_version(path, 62)
37 |         ]
38 |         for probe_type, paths in moz_central_scraper.REGISTRY_FILES.items()
39 |     }
40 | 
41 |     record = {
42 |         "date": datetime(2018, 10, 1, 18, 40, 35),
43 |         "version": 62,
44 |         "registries": registries,
45 |     }
46 | 
47 |     assert res[channel][revision] == record
48 | 


--------------------------------------------------------------------------------
/tests/test_repositories_parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | 
  4 | import jsonschema
  5 | import pytest
  6 | import yaml
  7 | 
  8 | from probe_scraper.parsers.repositories import RepositoriesParser
  9 | 
 10 | 
 11 | def write_to_temp_file(data):
 12 |     fd, path = tempfile.mkstemp()
 13 |     with os.fdopen(fd, "w") as tmp:
 14 |         tmp.write(yaml.dump(data))
 15 |     return path
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def parser():
 20 |     return RepositoriesParser()
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def incorrect_repos_file():
 25 |     data = {
 26 |         "some-repo": {
 27 |             # missing `notification_emails`
 28 |             "app_id": "mobile-metrics-example",
 29 |             "description": "foo",
 30 |             "url": "www.github.com/fbertsch/mobile-metrics-example",
 31 |             "metrics_files": ["metrics.yaml"],
 32 |         }
 33 |     }
 34 | 
 35 |     return write_to_temp_file(data)
 36 | 
 37 | 
 38 | @pytest.fixture
 39 | def correct_repos_file():
 40 |     data = {
 41 |         "test-repo": {
 42 |             "app_id": "mobile-metrics-example",
 43 |             "description": "foo",
 44 |             "channel": "release",
 45 |             "url": "www.github.com/fbertsch/mobile-metrics-example",
 46 |             "notification_emails": ["frank@mozilla.com"],
 47 |             "metrics_files": ["metrics.yaml"],
 48 |         }
 49 |     }
 50 | 
 51 |     return write_to_temp_file(data)
 52 | 
 53 | 
 54 | @pytest.fixture
 55 | def invalid_release_channel_file():
 56 |     data = {
 57 |         "test-repo": {
 58 |             "app_id": "mobile-metrics-example",
 59 |             "description": "foo",
 60 |             "channel": "releaze",
 61 |             "url": "www.github.com/fbertsch/mobile-metrics-example",
 62 |             "notification_emails": ["frank@mozilla.com"],
 63 |             "metrics_files": ["metrics.yaml"],
 64 |         }
 65 |     }
 66 | 
 67 |     return write_to_temp_file(data)
 68 | 
 69 | 
 70 | def test_repositories(parser):
 71 |     parser.validate()
 72 | 
 73 | 
 74 | def test_repositories_parser_incorrect(parser, incorrect_repos_file):
 75 |     with pytest.raises(jsonschema.exceptions.ValidationError):
 76 |         parser.validate(incorrect_repos_file)
 77 | 
 78 | 
 79 | def test_repositories_parser_invalid_channel(parser, invalid_release_channel_file):
 80 |     with pytest.raises(jsonschema.exceptions.ValidationError):
 81 |         parser.validate(invalid_release_channel_file)
 82 | 
 83 | 
 84 | def test_repositories_class(parser, correct_repos_file):
 85 |     repos = parser.parse(correct_repos_file)
 86 | 
 87 |     assert len(repos) == 1
 88 |     assert set(repos[0].get_metrics_file_paths()) == {"metrics.yaml"}
 89 |     assert repos[0].to_dict() == {
 90 |         "app_id": "mobile-metrics-example",
 91 |         "channel": "release",
 92 |         "dependencies": [],
 93 |         "deprecated": False,
 94 |         "description": "foo",
 95 |         "metrics_file_paths": ["metrics.yaml"],
 96 |         "name": "test-repo",
 97 |         "notification_emails": ["frank@mozilla.com"],
 98 |         "ping_file_paths": [],
 99 |         "prototype": False,
100 |         "tag_file_paths": [],
101 |         "url": "www.github.com/fbertsch/mobile-metrics-example",
102 |         "skip_documentation": False,
103 |         "moz_pipeline_metadata_defaults": {},
104 |         "moz_pipeline_metadata": {},
105 |     }
106 | 


--------------------------------------------------------------------------------
/tests/test_scalar_parser.py:
--------------------------------------------------------------------------------
 1 | from probe_scraper.parsers.scalars import ScalarsParser
 2 | 
 3 | 
 4 | def is_string(s):
 5 |     return isinstance(s, str)
 6 | 
 7 | 
 8 | def test_scalar_parser():
 9 |     # Parse the histograms from the test definitions.
10 |     parser = ScalarsParser()
11 |     parsed_scalars = parser.parse(["tests/resources/test_scalars.yaml"], "55")
12 | 
13 |     # Make sure we loaded all the scalars.
14 |     assert len(parsed_scalars) == 17
15 | 
16 |     # Make sure each of them contains all the required fields and details.
17 |     REQUIRED_FIELDS = [
18 |         "cpp_guard",
19 |         "description",
20 |         "details",
21 |         "expiry_version",
22 |         "optout",
23 |         "bug_numbers",
24 |     ]
25 |     REQUIRED_DETAILS = ["keyed", "kind", "record_in_processes", "record_into_store"]
26 | 
27 |     for name, data in parsed_scalars.items():
28 |         assert is_string(name)
29 | 
30 |         # Make sure we have all the required fields and details.
31 |         for field in REQUIRED_FIELDS:
32 |             assert field in data
33 | 
34 |         for field in REQUIRED_DETAILS:
35 |             assert field in data["details"]
36 | 
37 |         # If multiple stores set, they should be both listed
38 |         if name == "other.test.multistore_probe":
39 |             assert ["main", "store2"] == data["details"]["record_into_store"]
40 |         else:
41 |             # Default multistore if unspecified is just "main"
42 |             assert ["main"] == data["details"]["record_into_store"]
43 | 


--------------------------------------------------------------------------------