├── .eslintrc.json ├── .github ├── dependabot.yml └── workflows │ ├── test.yml │ └── update_gh_pages.yml ├── .gitignore ├── .nvmrc ├── LICENSE.md ├── README.md ├── bad-ranges.js ├── browser-specific-failures.js ├── build.sh ├── cache └── .gitignore ├── deploy.sh ├── git-write.js ├── interop-scoring ├── category-data.json └── main.js ├── lib ├── browser-specific.js ├── index.js ├── results.js └── runs.js ├── package-lock.json ├── package.json └── test ├── bad-ranges.js └── browser-specific.js /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "commonjs": true, 4 | "es6": true, 5 | "node": true 6 | }, 7 | "extends": [ 8 | "google" 9 | ], 10 | "globals": { 11 | "Atomics": "readonly", 12 | "SharedArrayBuffer": "readonly" 13 | }, 14 | "parserOptions": { 15 | "ecmaVersion": 2018 16 | }, 17 | "rules": { 18 | "arrow-parens": [ 19 | 2, 20 | "as-needed" 21 | ], 22 | "require-jsdoc": 0 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: npm 4 | directory: / 5 | schedule: 6 | interval: monthly 7 | ignore: 8 | - dependency-name: "chai" 9 | update-types: ["version-update:semver-major"] 10 | - dependency-name: "eslint" 11 | update-types: ["version-update:semver-major"] 12 | - dependency-name: "flags" 13 | update-types: ["version-update:semver-minor"] 14 | - dependency-name: "node-fetch" 15 | update-types: ["version-update:semver-major"] 16 | 17 | - package-ecosystem: github-actions 18 | directory: / 19 | schedule: 20 | interval: monthly 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: PR 2 | on: pull_request 3 | jobs: 4 | test: 5 | runs-on: ubuntu-22.04 6 | steps: 7 | - uses: actions/checkout@v4 8 | 9 | - uses: awalsh128/cache-apt-pkgs-action@v1 10 | with: 11 | packages: libssl-dev libkrb5-dev 12 | version: 1.0 13 | 14 | - name: Setup Node.js 15 | uses: actions/setup-node@v4 16 | with: 17 | node-version: '18' 18 | cache: 'npm' 19 | 20 | - name: Install dependencies 21 | run: npm ci 22 | 23 | - name: Run tests 24 | run: npm run test-unit 25 | 26 | - name: Run lint 27 | run: npm run lint 28 | -------------------------------------------------------------------------------- /.github/workflows/update_gh_pages.yml: -------------------------------------------------------------------------------- 1 | # This needs to stay in sync with the results.yml workflow in results-analysis, 2 | # since both repos check out results-analysis and run the same code. This setup 3 | # is to avoid having a secret access token to allow a workflow in one repo to 4 | # push to the other repo. 5 | name: Update gh-pages 6 | on: 7 | # Trigger every three hours, or manually. 8 | schedule: 9 | - cron: '20 */3 * * *' 10 | workflow_dispatch: 11 | jobs: 12 | update-gh-pages: 13 | runs-on: ubuntu-22.04 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - uses: awalsh128/cache-apt-pkgs-action@v1 18 | with: 19 | packages: libssl-dev libkrb5-dev 20 | version: 1.0 21 | 22 | - name: Setup Node.js 23 | uses: actions/setup-node@v4 24 | with: 25 | node-version: '18' 26 | cache: 'npm' 27 | 28 | - name: Install dependencies 29 | run: npm ci 30 | 31 | - name: Build 32 | run: ./build.sh 33 | 34 | - name: Deploy to gh-pages/ 35 | uses: JamesIves/github-pages-deploy-action@releases/v3 36 | with: 37 | ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} 38 | BRANCH: gh-pages 39 | FOLDER: out 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out/ 2 | gh-pages/ 3 | 4 | node_modules/ 5 | results-analysis-cache.git/ 6 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | 18 2 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The 3-Clause BSD License 2 | 3 | Copyright © Google LLC 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WPT Results Analysis 2 | 3 | This repository contains a set of scripts for doing analysis on results from 4 | runs of [web-platform-tests](https://web-platform-tests.org/) uploaded to 5 | [wpt.fyi](https://wpt.fyi). It utilizes a git repository, 6 | [results-analysis-cache](https://github.com/web-platform-tests/results-analysis-cache), 7 | as a storage and compression mechanism for the results data. 8 | 9 | The analysis files are expected to be run via `node`, and should be 10 | independently documented inside the file. 11 | 12 | ### gh-pages 13 | 14 | Metrics are regularly updated and pushed to the `gh-pages` branch using GitHub Actions, see [update_gh_pages.yml](.github/workflows/update_gh_pages.yml). 15 | 16 | ## Setup 17 | 18 | Run `npm install`; this will install any necessary dependencies and clone the 19 | results-analysis-cache repo locally. 20 | 21 | ### results-analysis-cache repository 22 | 23 | This repository stores results from WPT runs as a flat forest. Each run is an 24 | orphan commit with no parent, and is tagged with the run id. This allows for an 25 | excellent compression ratio, whilst still having reasonable lookup time. 26 | 27 | The repository can be updated via `git-write.js`, though this happens 28 | automatically upstream (via a cronjob). As such, to fetch new runs you should 29 | just `cd` into the results-analysis-cache directory and run `git pull`. 30 | 31 | ## Running the scripts 32 | 33 | ### browser-specific-failures.js 34 | 35 | ``` 36 | $ node browser-specific-failures.js --help 37 | Usage: node browser-specific-failures.js [options] 38 | 39 | Options: 40 | --from: Starting date (inclusive) 41 | (default: "2018-07-01") 42 | --to: Ending date (exclusive) 43 | (default: "2020-05-21") 44 | --products: Browsers to compare. Must match the products used on 45 | wpt.fyi 46 | (default: ["chrome","firefox","safari"]) 47 | --output: Output CSV file to write to. Defaults to {stable, 48 | experimental}-browser-specific-failures.csv 49 | (default: null) 50 | --[no]experimental: Calculate metrics for experimental runs. 51 | (default: false) 52 | ``` 53 | 54 | ### Generating end-of-year interop scores 55 | 56 | The scores used on the current year of the Interop Dashboard are generated 57 | regularly from this repository. At the end of each year, the scoring script 58 | should be run to generate final CSV files to use as the static final numbers 59 | for that interop year. Both stable and experimental numbers should be generated. 60 | 61 | ``` 62 | $ node interop-scoring/main.js --year={{YEAR}} --to={{YEAR + 1}}-01-01 63 | $ node interop-scoring/main.js --year={{YEAR}} --to={{YEAR + 1}}-01-01 --experimental 64 | ``` 65 | 66 | This will generate two files with final year numbers to use. These files should 67 | be added to the `webapp/static` directory of the 68 | [wpt.fyi repository](https://github.com/web-platform-tests/wpt.fyi). The 69 | `csv_url` property should be updated for that year in 70 | [webapp/components/interop-data.js](https://github.com/web-platform-tests/wpt.fyi/blob/main/webapp/components/interop-data.js) 71 | to match the new file locations. 72 | -------------------------------------------------------------------------------- /bad-ranges.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const moment = require('moment'); 4 | 5 | // There have been periods where results cannot be considered valid and 6 | // contribute noise to the metrics. These date ranges are listed below, with 7 | // inclusive start dates and exclusive end dates. 8 | 9 | const STABLE_BAD_RANGES = [ 10 | // This was a safaridriver outage, resolved by 11 | // https://github.com/web-platform-tests/wpt/pull/15659 12 | [moment('2019-02-06'), moment('2019-03-09')], 13 | // This was a safaridriver outage, resolved by 14 | // https://github.com/web-platform-tests/wpt/pull/18585 15 | [moment('2019-06-27'), moment('2019-08-23')], 16 | // This was a general outage due to the Taskcluster Checks migration. 17 | [moment('2020-07-08'), moment('2020-07-16')], 18 | // This was a Firefox outage which produced only partial test results. 19 | [moment('2020-07-21'), moment('2020-08-15')], 20 | // This was a regression from https://github.com/web-platform-tests/wpt/pull/29089, 21 | // fixed by https://github.com/web-platform-tests/wpt/pull/32540 22 | [moment('2022-01-25'), moment('2022-01-27')], 23 | // This was a very much incomplete Safari run. 24 | [moment('2023-07-17'), moment('2023-07-18')], 25 | // Safari got a lot of broken screenshots. 26 | // https://bugs.webkit.org/show_bug.cgi?id=262078 27 | [moment('2023-09-20'), moment('2023-09-21')], 28 | ]; 29 | 30 | const EXPERIMENTAL_BAD_RANGES = [ 31 | // This was a safaridriver outage, resolved by 32 | // https://github.com/web-platform-tests/wpt/pull/18585 33 | [moment('2019-06-27'), moment('2019-08-23')], 34 | // Bad Firefox run: 35 | // https://wpt.fyi/results/?diff&filter=ADC&run_id=387040002&run_id=404070001 36 | [moment('2019-12-25'), moment('2019-12-26')], 37 | // This was a general outage due to the Taskcluster Checks migration. 38 | [moment('2020-07-08'), moment('2020-07-16')], 39 | // Bad Chrome run: 40 | // https://wpt.fyi/results/?diff&filter=ADC&run_id=622910001&run_id=634430001 41 | [moment('2020-07-31'), moment('2020-08-01')], 42 | // Something went wrong with the Firefox run on this date. 43 | [moment('2021-03-08'), moment('2021-03-09')], 44 | // This was a regression from https://github.com/web-platform-tests/wpt/pull/29089, 45 | // fixed by https://github.com/web-platform-tests/wpt/pull/32540 46 | [moment('2022-01-25'), moment('2022-01-27')], 47 | // These were very much incomplete Safari runs. 48 | [moment('2023-09-02'), moment('2023-09-03')], 49 | [moment('2023-09-11'), moment('2023-09-12')], 50 | [moment('2023-09-20'), moment('2023-09-21')], 51 | [moment('2023-09-22'), moment('2023-09-23')], 52 | [moment('2024-01-29'), moment('2024-01-30')], 53 | ]; 54 | 55 | // Advances date to the end of a bad range if it's in a bad range, and otherwise 56 | // returns the same date value. 57 | function advanceDateToSkipBadDataIfNecessary(date, experimental) { 58 | const ranges = experimental ? EXPERIMENTAL_BAD_RANGES : STABLE_BAD_RANGES; 59 | for (const range of ranges) { 60 | if (date >= range[0] && date < range[1]) { 61 | console.log(`Skipping from ${date.format('YYYY-MM-DD')} to ` + 62 | `${range[1].format('YYYY-MM-DD')} due to bad data`); 63 | return range[1]; 64 | } 65 | } 66 | return date; 67 | } 68 | 69 | 70 | module.exports = {advanceDateToSkipBadDataIfNecessary}; 71 | -------------------------------------------------------------------------------- /browser-specific-failures.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Implements a view of how many browser specific failures each engine has over 5 | * time. 6 | */ 7 | 8 | const fs = require('fs'); 9 | const flags = require('flags'); 10 | const Git = require('nodegit'); 11 | const lib = require('./lib'); 12 | const moment = require('moment'); 13 | 14 | flags.defineString('from', '2018-07-01', 'Starting date (inclusive)'); 15 | flags.defineString('to', moment().format('YYYY-MM-DD'), 16 | 'Ending date (exclusive)'); 17 | flags.defineStringList('products', ['chrome', 'firefox', 'safari'], 18 | 'Browsers to compare. Must match the products used on wpt.fyi'); 19 | flags.defineString('output', null, 20 | 'Output CSV file to write to. Defaults to ' + 21 | '{stable, experimental}-browser-specific-failures.csv'); 22 | flags.defineBoolean('experimental', false, 23 | 'Calculate metrics for experimental runs.'); 24 | flags.parse(); 25 | 26 | 27 | async function main() { 28 | // Sort the products so that output files are consistent. 29 | const products = flags.get('products'); 30 | if (products.length < 2) { 31 | throw new Error('At least 2 products must be specified for this analysis'); 32 | } 33 | products.sort(); 34 | 35 | const repo = await Git.Repository.open('results-analysis-cache.git'); 36 | 37 | // First, grab aligned runs from the server for the dates that we are 38 | // interested in. 39 | const from = moment(flags.get('from')); 40 | const to = moment(flags.get('to')); 41 | const experimental = flags.get('experimental'); 42 | const alignedRuns = await lib.runs.fetchAlignedRunsFromServer( 43 | products, from, to, experimental); 44 | 45 | // Verify that we have data for the fetched runs in the results-analysis-cache 46 | // repo. 47 | console.log('Getting local set of run ids from repo'); 48 | let before = Date.now(); 49 | const localRunIds = await lib.results.getLocalRunIds(repo); 50 | let after = Date.now(); 51 | console.log(`Found ${localRunIds.size} ids (took ${after - before} ms)`); 52 | 53 | let hadErrors = false; 54 | for (const [date, runs] of alignedRuns.entries()) { 55 | for (const run of runs) { 56 | if (!localRunIds.has(run.id)) { 57 | // If you see this, you probably need to run git-write.js or just update 58 | // your results-analysis-cache.git repo; see the README.md. 59 | console.error(`Run ${run.id} missing from local git repo (${date})`); 60 | hadErrors = true; 61 | } 62 | } 63 | } 64 | if (hadErrors) { 65 | throw new Error('Missing data for some runs (see errors logged above). ' + 66 | 'Try running "git fetch --all --tags" in results-analysis-cache/'); 67 | } 68 | 69 | // Load the test result trees into memory; creates a list of recursive tree 70 | // structures: tree = { trees: [...], tests: [...] }. Each 'tree' represents a 71 | // directory, each 'test' is the results from a given test file. 72 | console.log('Iterating over all runs, loading test results'); 73 | before = Date.now(); 74 | for (const runs of alignedRuns.values()) { 75 | for (const run of runs) { 76 | // Just in case someone ever adds a 'tree' field to the JSON. 77 | if (run.tree) { 78 | throw new Error('Run JSON contains "tree" field; code needs changed.'); 79 | } 80 | run.tree = await lib.results.getGitTree(repo, run); 81 | } 82 | } 83 | after = Date.now(); 84 | console.log(`Loading ${alignedRuns.size} sets of runs took ` + 85 | `${after - before} ms`); 86 | 87 | // We're ready to score the runs now! 88 | console.log('Calculating browser-specific failures for the runs'); 89 | before = Date.now(); 90 | const dateToScores = new Map(); 91 | for (const [date, runs] of alignedRuns.entries()) { 92 | // The SHA should be the same for all runs, so just grab the first. 93 | const sha = runs[0].full_revision_hash; 94 | const versions = runs.map(run => run.browser_version); 95 | try { 96 | const scores = lib.browserSpecific.scoreBrowserSpecificFailures( 97 | runs, new Set(products)); 98 | dateToScores.set(date, {sha, versions, scores}); 99 | } catch (e) { 100 | e.message += `\n\tRuns: ${runs.map(r => r.id)}`; 101 | throw e; 102 | } 103 | } 104 | after = Date.now(); 105 | console.log(`Done scoring (took ${after - before} ms)`); 106 | 107 | // Finally, time to dump stuff. 108 | let outputFilename = flags.get('output'); 109 | if (!outputFilename) { 110 | outputFilename = experimental ? 111 | 'experimental-browser-specific-failures.csv' : 112 | 'stable-browser-specific-failures.csv'; 113 | } 114 | 115 | console.log(`Writing data to ${outputFilename}`); 116 | 117 | let data = 'sha,date'; 118 | for (const product of products) { 119 | data += `,${product}-version,${product}`; 120 | } 121 | data += '\n'; 122 | 123 | // ES6 maps iterate in insertion order, and we initially inserted in date 124 | // order, so we can just iterate |dateToScores|. 125 | for (const [date, shaAndScores] of dateToScores) { 126 | const sha = shaAndScores.sha; 127 | const scores = shaAndScores.scores; 128 | const versions = shaAndScores.versions; 129 | if (!scores) { 130 | console.log(`ERROR: ${date} had no scores`); 131 | continue; 132 | } 133 | const csvRecord = [ 134 | sha, 135 | date.substr(0, 10), 136 | ]; 137 | for (let i = 0; i < products.length; i++) { 138 | csvRecord.push(versions[i]); 139 | csvRecord.push(scores.get(products[i])); 140 | } 141 | data += csvRecord.join(',') + '\n'; 142 | } 143 | await fs.promises.writeFile(outputFilename, data, 'utf-8'); 144 | } 145 | 146 | main().catch(reason => { 147 | console.error(reason); 148 | process.exit(1); 149 | }); 150 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | rm -rf out/ 8 | mkdir -p out/data/ 9 | 10 | echo "Updating results-analysis-cache.git/" 11 | cd results-analysis-cache.git/ 12 | git fetch --all --tags 13 | cd ../ 14 | 15 | # Scoring scripts may require more memory than the default. 16 | export NODE_OPTIONS="--max-old-space-size=8192" 17 | 18 | TO_DATE=$(date -d "tomorrow 13:00" '+%Y-%m-%d') 19 | 20 | node git-write.js --max-time=300 --max-age-days=5 21 | 22 | update_bsf_csv() { 23 | local OUTPUT="${1}" 24 | 25 | local FROM_DATE="2018-06-01" 26 | local EXPERIMENTAL_FLAG="" 27 | if [[ $1 == *"experimental"* ]]; then 28 | EXPERIMENTAL_FLAG="--experimental" 29 | fi 30 | 31 | node browser-specific-failures.js \ 32 | ${EXPERIMENTAL_FLAG} --from=${FROM_DATE} --to=${TO_DATE} \ 33 | --output=${OUTPUT} 34 | } 35 | 36 | update_bsf_csv out/data/stable-browser-specific-failures.csv 37 | update_bsf_csv out/data/experimental-browser-specific-failures.csv 38 | 39 | update_interop_year() { 40 | local YEAR="${1}" 41 | local PRODUCTS="${2}" 42 | 43 | mkdir -p out/data/interop-${YEAR}/ 44 | node interop-scoring/main.js --year=${YEAR} --to=${TO_DATE} --products=${PRODUCTS} 45 | node interop-scoring/main.js --year=${YEAR} --to=${TO_DATE} --products=${PRODUCTS} --experimental 46 | 47 | mv interop-${YEAR}-*.csv out/data/interop-${YEAR}/ 48 | } 49 | 50 | update_interop_year 2025 chrome,edge,firefox,safari 51 | -------------------------------------------------------------------------------- /cache/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | COMMIT="$(git rev-parse HEAD)" 8 | 9 | rm -rf gh-pages 10 | git clone --branch gh-pages git@github.com:web-platform-tests/results-analysis gh-pages 11 | 12 | rm -rf gh-pages/* 13 | cp -r out/* gh-pages/ 14 | 15 | cd gh-pages/ 16 | 17 | git add -A 18 | git commit -m "Updating graphs" -m "Using commit $COMMIT" 19 | git push 20 | -------------------------------------------------------------------------------- /git-write.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | 3 | 'use strict'; 4 | 5 | const fetch = require('node-fetch'); 6 | const flags = require('flags'); 7 | const moment = require('moment'); 8 | const Git = require('nodegit'); 9 | const runs = require('./lib/runs'); 10 | 11 | flags.defineInteger('max-runs', 0, 'Write at most this many runs'); 12 | flags.defineInteger('max-time', 0, 'Run for at most this many seconds'); 13 | flags.defineInteger('max-age-days', 0, 'Don\'t process any runs older than this many days'); 14 | flags.parse(); 15 | 16 | async function writeRunToGit(run, repo) { 17 | const tagName = `run/${run.id}/results`; 18 | try { 19 | await repo.getReference(`refs/tags/${tagName}`); 20 | return false; 21 | } catch (e) {} 22 | 23 | const reportURL = run.raw_results_url; 24 | console.log(`Fetching ${reportURL}`); 25 | const report = await (await fetch(reportURL)).json(); 26 | await writeReportToGit(report, repo, reportURL, tagName); 27 | console.log(`Wrote ${tagName}`); 28 | return true; 29 | } 30 | 31 | async function writeReportToGit(report, repo, commitMessage, tagName) { 32 | // Create a tree of Treebuilders. When all the files have been written, this 33 | // tree is traversed depth first to write all of the trees. 34 | async function emptyTree() { 35 | const builder = await Git.Treebuilder.create(repo, null); 36 | return {builder, subtrees: new Map}; 37 | } 38 | 39 | const rootTree = await emptyTree(); 40 | 41 | async function getTree(dirs) { 42 | let tree = rootTree; 43 | for (let i = 0; i < dirs.length; i++) { 44 | const dir = dirs[i]; 45 | let subtree = tree.subtrees.get(dir); 46 | if (!subtree) { 47 | subtree = await emptyTree(); 48 | tree.subtrees.set(dir, subtree); 49 | } 50 | tree = subtree; 51 | } 52 | return tree; 53 | } 54 | 55 | async function writeTree(tree) { 56 | for (const [dir, subtree] of tree.subtrees.entries()) { 57 | const oid = await writeTree(subtree); 58 | tree.builder.insert(dir, oid, Git.TreeEntry.FILEMODE.TREE); 59 | } 60 | return tree.builder.write(); 61 | } 62 | 63 | const blobCache = new Map; 64 | 65 | function isJSONObject(v) { 66 | return typeof v === 'object' && v !== null && !Array.isArray(v); 67 | } 68 | 69 | // Keep only a fixed set of keys. This filters out at least: 70 | // - "duration" which is different for every run 71 | // - "expected" which will always be "PASS" or "OK" for wpt.fyi runs 72 | // - "known_intermittent" which is for flaky expectations 73 | // - "message" which contains the failure reason 74 | // - "screenshots" which contains screenshot hashes 75 | // - "test" which is the test path, and will be represented elsewhere 76 | // Note that "" is the dummy key value for the initial object. 77 | const keepKeys = new Set(['', 'name', 'status', 'subtests']); 78 | 79 | function replacer(key, value) { 80 | // eslint-disable-next-line no-invalid-this 81 | if (isJSONObject(this)) { 82 | if (!keepKeys.has(key)) { 83 | return undefined; 84 | } 85 | } 86 | 87 | // If the value is null (often for "message"), just omit it. 88 | if (value === null) { 89 | return undefined; 90 | } 91 | 92 | // If the value is an empty array (often for "subtests"), just omit it. 93 | if (Array.isArray(value) && value.length === 0) { 94 | return undefined; 95 | } 96 | 97 | // Ensure that objects keys are sorted, as they would be if using 98 | // `json.dumps(value, sort_keys=True)` in Python. 99 | if (isJSONObject(value)) { 100 | const valueKeys = Object.keys(value); 101 | valueKeys.sort(); 102 | const sortedValue = {}; 103 | for (const valueKey of valueKeys) { 104 | sortedValue[valueKey] = value[valueKey]; 105 | } 106 | return sortedValue; 107 | } 108 | 109 | return value; 110 | } 111 | 112 | for (const test of report.results) { 113 | const json = JSON.stringify(test, replacer); 114 | 115 | let blobId = blobCache.get(json); 116 | 117 | if (!blobId) { 118 | const buffer = Buffer.from(json); 119 | blobId = await Git.Blob.createFromBuffer(repo, buffer, buffer.length); 120 | blobCache.set(json, blobId); 121 | } 122 | 123 | const path = test.test; 124 | // Complexity to handle /foo/bar/test.html?a/b, which can occur especially 125 | // with variants. decodeURIComponent needs to be used when reading. 126 | const queryStart = path.indexOf('?'); 127 | const lastSlash = path.lastIndexOf('/', queryStart >= 0 ? queryStart : path.length); 128 | const dirname = path.substr(0, lastSlash); 129 | const filename = encodeURIComponent(path.substr(lastSlash + 1)); 130 | 131 | const dirs = dirname.split('/').filter(d => d); 132 | 133 | const tree = await getTree(dirs); 134 | tree.builder.insert(`${filename}.json`, blobId, Git.TreeEntry.FILEMODE.BLOB); 135 | } 136 | 137 | const oid = await writeTree(rootTree); 138 | 139 | const signature = Git.Signature.now('autofoolip', 'auto@foolip.org'); 140 | 141 | const commit = await repo.createCommit(null, signature, signature, commitMessage, oid, []); 142 | 143 | await repo.createLightweightTag(commit, tagName); 144 | } 145 | 146 | async function main() { 147 | // bare clone of https://github.com/web-platform-tests/results-analysis-cache 148 | const repo = await Git.Repository.init('results-analysis-cache.git', 1); 149 | 150 | const maxRuns = flags.get('max-runs'); 151 | const maxTime = flags.get('max-time'); 152 | const maxAgeDays = flags.get('max-age-days'); 153 | 154 | let totalRuns = 0; 155 | let writtenRuns = 0; 156 | const deadline = maxTime ? Date.now() + 1000 * maxTime : NaN; 157 | const maxAge = maxAgeDays ? moment().subtract(maxAgeDays, 'days') : null; 158 | 159 | const products = [ 160 | 'android_webview', 161 | 'chrome', 162 | 'chrome_android', 163 | 'chromium', 164 | 'deno', 165 | 'edge', 166 | 'epiphany', 167 | 'firefox', 168 | 'firefox_android', 169 | 'flow', 170 | 'node.js', 171 | 'safari', 172 | 'servo', 173 | 'uc', 174 | 'webkitgtk', 175 | 'wktr', 176 | ]; 177 | 178 | for (const product of products) { 179 | let productRuns = 0; 180 | let stop = false; 181 | for await (const run of runs.getIterator({product})) { 182 | productRuns++; 183 | totalRuns++; 184 | // Skip runs of affected tests for PRs. 185 | if (run.labels.some(l => l === 'pr_base' || l === 'pr_head')) { 186 | continue; 187 | } 188 | const didWrite = await writeRunToGit(run, repo); 189 | if (didWrite) { 190 | writtenRuns++; 191 | if (maxRuns && writtenRuns >= maxRuns) { 192 | console.log(`Stopping because limit of ${maxRuns} runs was reached`); 193 | stop = true; 194 | break; 195 | } 196 | } 197 | if (maxTime && Date.now() >= deadline) { 198 | console.log(`Stopping because limit of ${maxTime} seconds was reached`); 199 | stop = true; 200 | break; 201 | } 202 | if (maxAge && moment(run.created_at) < maxAge) { 203 | console.log(`Moving to next product because limit of ${maxAgeDays} days old was reached`); 204 | break; 205 | } 206 | } 207 | console.log(`Iterated ${productRuns} ${product} runs`); 208 | if (stop) { 209 | break; 210 | } 211 | } 212 | console.log(`Iterated ${totalRuns} runs in total`); 213 | } 214 | 215 | main().catch(reason => { 216 | console.error(reason); 217 | process.exit(1); 218 | }); 219 | -------------------------------------------------------------------------------- /interop-scoring/category-data.json: -------------------------------------------------------------------------------- 1 | { 2 | "2021": { 3 | "categories": [ 4 | { 5 | "name": "interop-2021-aspect-ratio", 6 | "labels": [ 7 | "interop-2021-aspect-ratio" 8 | ] 9 | }, 10 | { 11 | "name": "interop-2021-flexbox", 12 | "labels": [ 13 | "interop-2021-flexbox" 14 | ] 15 | }, 16 | { 17 | "name": "interop-2021-grid", 18 | "labels": [ 19 | "interop-2021-grid" 20 | ] 21 | }, 22 | { 23 | "name": "interop-2021-position-sticky", 24 | "labels": [ 25 | "interop-2021-position-sticky" 26 | ] 27 | }, 28 | { 29 | "name": "interop-2021-transforms", 30 | "labels": [ 31 | "interop-2021-transforms" 32 | ] 33 | } 34 | ] 35 | }, 36 | "2022": { 37 | "categories": [ 38 | { 39 | "name": "interop-2021-aspect-ratio", 40 | "labels": [ 41 | "interop-2021-aspect-ratio" 42 | ] 43 | }, 44 | { 45 | "name": "interop-2021-flexbox", 46 | "labels": [ 47 | "interop-2021-flexbox" 48 | ] 49 | }, 50 | { 51 | "name": "interop-2021-grid", 52 | "labels": [ 53 | "interop-2021-grid" 54 | ] 55 | }, 56 | { 57 | "name": "interop-2021-position-sticky", 58 | "labels": [ 59 | "interop-2021-position-sticky" 60 | ] 61 | }, 62 | { 63 | "name": "interop-2021-transforms", 64 | "labels": [ 65 | "interop-2021-transforms" 66 | ] 67 | }, 68 | { 69 | "name": "interop-2022-cascade", 70 | "labels": [ 71 | "interop-2022-cascade" 72 | ] 73 | }, 74 | { 75 | "name": "interop-2022-color", 76 | "labels": [ 77 | "interop-2022-color" 78 | ] 79 | }, 80 | { 81 | "name": "interop-2022-contain", 82 | "labels": [ 83 | "interop-2022-contain" 84 | ] 85 | }, 86 | { 87 | "name": "interop-2022-dialog", 88 | "labels": [ 89 | "interop-2022-dialog" 90 | ] 91 | }, 92 | { 93 | "name": "interop-2022-forms", 94 | "labels": [ 95 | "interop-2022-forms" 96 | ] 97 | }, 98 | { 99 | "name": "interop-2022-scrolling", 100 | "labels": [ 101 | "interop-2022-scrolling" 102 | ] 103 | }, 104 | { 105 | "name": "interop-2022-subgrid", 106 | "labels": [ 107 | "interop-2022-subgrid" 108 | ] 109 | }, 110 | { 111 | "name": "interop-2022-text", 112 | "labels": [ 113 | "interop-2022-text" 114 | ] 115 | }, 116 | { 117 | "name": "interop-2022-viewport", 118 | "labels": [ 119 | "interop-2022-viewport" 120 | ] 121 | }, 122 | { 123 | "name": "interop-2022-webcompat", 124 | "labels": [ 125 | "interop-2022-webcompat" 126 | ] 127 | } 128 | ] 129 | }, 130 | "2023": { 131 | "categories": [ 132 | { 133 | "name": "interop-2021-aspect-ratio", 134 | "labels": [ 135 | "interop-2021-aspect-ratio" 136 | ] 137 | }, 138 | { 139 | "name": "interop-2021-position-sticky", 140 | "labels": [ 141 | "interop-2021-position-sticky" 142 | ] 143 | }, 144 | { 145 | "name": "interop-2022-cascade", 146 | "labels": [ 147 | "interop-2022-cascade" 148 | ] 149 | }, 150 | { 151 | "name": "interop-2022-dialog", 152 | "labels": [ 153 | "interop-2022-dialog" 154 | ] 155 | }, 156 | { 157 | "name": "interop-2022-text", 158 | "labels": [ 159 | "interop-2022-text" 160 | ] 161 | }, 162 | { 163 | "name": "interop-2022-viewport", 164 | "labels": [ 165 | "interop-2022-viewport" 166 | ] 167 | }, 168 | { 169 | "name": "interop-2022-webcompat", 170 | "labels": [ 171 | "interop-2022-webcompat" 172 | ] 173 | }, 174 | { 175 | "name": "interop-2023-cssborderimage", 176 | "labels": [ 177 | "interop-2023-cssborderimage" 178 | ] 179 | }, 180 | { 181 | "name": "interop-2023-color", 182 | "labels": [ 183 | "interop-2022-color", 184 | "interop-2023-color" 185 | ] 186 | }, 187 | { 188 | "name": "interop-2023-container", 189 | "labels": [ 190 | "interop-2023-container" 191 | ] 192 | }, 193 | { 194 | "name": "interop-2023-contain", 195 | "labels": [ 196 | "interop-2022-contain", 197 | "interop-2023-contain" 198 | ] 199 | }, 200 | { 201 | "name": "interop-2023-pseudos", 202 | "labels": [ 203 | "interop-2023-pseudos" 204 | ] 205 | }, 206 | { 207 | "name": "interop-2023-property", 208 | "labels": [ 209 | "interop-2023-property" 210 | ] 211 | }, 212 | { 213 | "name": "interop-2023-flexbox", 214 | "labels": [ 215 | "interop-2021-flexbox", 216 | "interop-2023-flexbox" 217 | ] 218 | }, 219 | { 220 | "name": "interop-2023-fonts", 221 | "labels": [ 222 | "interop-2023-fonts" 223 | ] 224 | }, 225 | { 226 | "name": "interop-2023-forms", 227 | "labels": [ 228 | "interop-2022-forms", 229 | "interop-2023-forms" 230 | ] 231 | }, 232 | { 233 | "name": "interop-2023-grid", 234 | "labels": [ 235 | "interop-2021-grid", 236 | "interop-2023-grid" 237 | ] 238 | }, 239 | { 240 | "name": "interop-2023-has", 241 | "labels": [ 242 | "interop-2023-has" 243 | ] 244 | }, 245 | { 246 | "name": "interop-2023-inert", 247 | "labels": [ 248 | "interop-2023-inert" 249 | ] 250 | }, 251 | { 252 | "name": "interop-2023-cssmasking", 253 | "labels": [ 254 | "interop-2023-cssmasking" 255 | ] 256 | }, 257 | { 258 | "name": "interop-2023-mathfunctions", 259 | "labels": [ 260 | "interop-2023-mathfunctions" 261 | ] 262 | }, 263 | { 264 | "name": "interop-2023-mediaqueries", 265 | "labels": [ 266 | "interop-2023-mediaqueries" 267 | ] 268 | }, 269 | { 270 | "name": "interop-2023-modules", 271 | "labels": [ 272 | "interop-2023-modules" 273 | ] 274 | }, 275 | { 276 | "name": "interop-2023-motion", 277 | "labels": [ 278 | "interop-2023-motion" 279 | ] 280 | }, 281 | { 282 | "name": "interop-2023-offscreencanvas", 283 | "labels": [ 284 | "interop-2023-offscreencanvas" 285 | ] 286 | }, 287 | { 288 | "name": "interop-2023-events", 289 | "labels": [ 290 | "interop-2023-events" 291 | ] 292 | }, 293 | { 294 | "name": "interop-2022-scrolling", 295 | "labels": [ 296 | "interop-2022-scrolling" 297 | ] 298 | }, 299 | { 300 | "name": "interop-2022-subgrid", 301 | "labels": [ 302 | "interop-2022-subgrid" 303 | ] 304 | }, 305 | { 306 | "name": "interop-2021-transforms", 307 | "labels": [ 308 | "interop-2021-transforms" 309 | ] 310 | }, 311 | { 312 | "name": "interop-2023-url", 313 | "labels": [ 314 | "interop-2023-url" 315 | ] 316 | }, 317 | { 318 | "name": "interop-2023-webcodecs", 319 | "labels": [ 320 | "interop-2023-webcodecs" 321 | ] 322 | }, 323 | { 324 | "name": "interop-2023-webcomponents", 325 | "labels": [ 326 | "interop-2023-webcomponents" 327 | ] 328 | }, 329 | { 330 | "name": "interop-2023-webcompat", 331 | "labels": [ 332 | "interop-2023-webcompat" 333 | ] 334 | } 335 | ] 336 | }, 337 | "2024": { 338 | "categories": [ 339 | { 340 | "name": "interop-2024-accessibility", 341 | "labels": [ 342 | "interop-2024-accessibility" 343 | ] 344 | }, 345 | { 346 | "name": "interop-2024-starting-style-transition-behavior", 347 | "labels": [ 348 | "interop-2024-starting-style", 349 | "interop-2024-transition-behavior" 350 | ] 351 | }, 352 | { 353 | "name": "interop-2023-property", 354 | "labels": [ 355 | "interop-2023-property" 356 | ] 357 | }, 358 | { 359 | "name": "interop-2024-dsd", 360 | "labels": [ 361 | "interop-2024-dsd" 362 | ] 363 | }, 364 | { 365 | "name": "interop-2024-dir", 366 | "labels": [ 367 | "interop-2024-dir" 368 | ] 369 | }, 370 | { 371 | "name": "interop-2024-font-size-adjust", 372 | "labels": [ 373 | "interop-2024-font-size-adjust" 374 | ] 375 | }, 376 | { 377 | "name": "interop-2024-websockets", 378 | "labels": [ 379 | "interop-2024-websockets" 380 | ] 381 | }, 382 | { 383 | "name": "interop-2024-indexeddb", 384 | "labels": [ 385 | "interop-2024-indexeddb" 386 | ] 387 | }, 388 | { 389 | "name": "interop-2024-layout", 390 | "labels": [ 391 | "interop-2021-flexbox", 392 | "interop-2021-grid", 393 | "interop-2023-flexbox", 394 | "interop-2023-grid", 395 | "interop-2022-subgrid" 396 | ] 397 | }, 398 | { 399 | "name": "interop-2024-nesting", 400 | "labels": [ 401 | "interop-2024-nesting" 402 | ] 403 | }, 404 | { 405 | "name": "interop-2023-events", 406 | "labels": [ 407 | "interop-2023-events" 408 | ] 409 | }, 410 | { 411 | "name": "interop-2024-popover", 412 | "labels": [ 413 | "interop-2024-popover" 414 | ] 415 | }, 416 | { 417 | "name": "interop-2024-relative-color", 418 | "labels": [ 419 | "interop-2024-relative-color" 420 | ] 421 | }, 422 | { 423 | "name": "interop-2024-video-rvfc", 424 | "labels": [ 425 | "interop-2024-video-rvfc" 426 | ] 427 | }, 428 | { 429 | "name": "interop-2024-scrollbar", 430 | "labels": [ 431 | "interop-2024-scrollbar" 432 | ] 433 | }, 434 | { 435 | "name": "interop-2024-text-wrap", 436 | "labels": [ 437 | "interop-2024-text-wrap" 438 | ] 439 | }, 440 | { 441 | "name": "interop-2023-url", 442 | "labels": [ 443 | "interop-2023-url" 444 | ] 445 | }, 446 | { 447 | "name": "interop-2021-aspect-ratio", 448 | "labels": [ 449 | "interop-2021-aspect-ratio" 450 | ] 451 | }, 452 | { 453 | "name": "interop-2021-position-sticky", 454 | "labels": [ 455 | "interop-2021-position-sticky" 456 | ] 457 | }, 458 | { 459 | "name": "interop-2022-cascade", 460 | "labels": [ 461 | "interop-2022-cascade" 462 | ] 463 | }, 464 | { 465 | "name": "interop-2022-dialog", 466 | "labels": [ 467 | "interop-2022-dialog" 468 | ] 469 | }, 470 | { 471 | "name": "interop-2022-text", 472 | "labels": [ 473 | "interop-2022-text" 474 | ] 475 | }, 476 | { 477 | "name": "interop-2022-viewport", 478 | "labels": [ 479 | "interop-2022-viewport" 480 | ] 481 | }, 482 | { 483 | "name": "interop-2022-webcompat", 484 | "labels": [ 485 | "interop-2022-webcompat" 486 | ] 487 | }, 488 | { 489 | "name": "interop-2023-cssborderimage", 490 | "labels": [ 491 | "interop-2023-cssborderimage" 492 | ] 493 | }, 494 | { 495 | "name": "interop-2023-color", 496 | "labels": [ 497 | "interop-2022-color", 498 | "interop-2023-color" 499 | ] 500 | }, 501 | { 502 | "name": "interop-2023-container", 503 | "labels": [ 504 | "interop-2023-container" 505 | ] 506 | }, 507 | { 508 | "name": "interop-2023-contain", 509 | "labels": [ 510 | "interop-2022-contain", 511 | "interop-2023-contain" 512 | ] 513 | }, 514 | { 515 | "name": "interop-2023-pseudos", 516 | "labels": [ 517 | "interop-2023-pseudos" 518 | ] 519 | }, 520 | { 521 | "name": "interop-2023-fonts", 522 | "labels": [ 523 | "interop-2023-fonts" 524 | ] 525 | }, 526 | { 527 | "name": "interop-2023-forms", 528 | "labels": [ 529 | "interop-2022-forms", 530 | "interop-2023-forms" 531 | ] 532 | }, 533 | { 534 | "name": "interop-2023-has", 535 | "labels": [ 536 | "interop-2023-has" 537 | ] 538 | }, 539 | { 540 | "name": "interop-2023-inert", 541 | "labels": [ 542 | "interop-2023-inert" 543 | ] 544 | }, 545 | { 546 | "name": "interop-2023-cssmasking", 547 | "labels": [ 548 | "interop-2023-cssmasking" 549 | ] 550 | }, 551 | { 552 | "name": "interop-2023-mathfunctions", 553 | "labels": [ 554 | "interop-2023-mathfunctions" 555 | ] 556 | }, 557 | { 558 | "name": "interop-2023-mediaqueries", 559 | "labels": [ 560 | "interop-2023-mediaqueries" 561 | ] 562 | }, 563 | { 564 | "name": "interop-2023-modules", 565 | "labels": [ 566 | "interop-2023-modules" 567 | ] 568 | }, 569 | { 570 | "name": "interop-2023-motion", 571 | "labels": [ 572 | "interop-2023-motion" 573 | ] 574 | }, 575 | { 576 | "name": "interop-2023-offscreencanvas", 577 | "labels": [ 578 | "interop-2023-offscreencanvas" 579 | ] 580 | }, 581 | { 582 | "name": "interop-2022-scrolling", 583 | "labels": [ 584 | "interop-2022-scrolling" 585 | ] 586 | }, 587 | { 588 | "name": "interop-2021-transforms", 589 | "labels": [ 590 | "interop-2021-transforms" 591 | ] 592 | }, 593 | { 594 | "name": "interop-2023-webcodecs", 595 | "labels": [ 596 | "interop-2023-webcodecs" 597 | ] 598 | }, 599 | { 600 | "name": "interop-2023-webcompat", 601 | "labels": [ 602 | "interop-2023-webcompat" 603 | ] 604 | }, 605 | { 606 | "name": "interop-2023-webcomponents", 607 | "labels": [ 608 | "interop-2023-webcomponents" 609 | ] 610 | } 611 | ] 612 | }, 613 | "2025": { 614 | "categories": [ 615 | { 616 | "name": "interop-2025-anchor-positioning", 617 | "labels": [ 618 | "interop-2025-anchor-positioning" 619 | ] 620 | }, 621 | { 622 | "name": "interop-2025-core-web-vitals", 623 | "labels": [ 624 | "interop-2025-core-web-vitals" 625 | ] 626 | }, 627 | { 628 | "name": "interop-2025-modules", 629 | "labels": [ 630 | "interop-2025-modules" 631 | ] 632 | }, 633 | { 634 | "name": "interop-2025-navigation", 635 | "labels": [ 636 | "interop-2025-navigation" 637 | ] 638 | }, 639 | { 640 | "name": "interop-2025-backdrop-filter", 641 | "labels": [ 642 | "interop-2025-backdrop-filter" 643 | ] 644 | }, 645 | { 646 | "name": "interop-2023-events", 647 | "labels": [ 648 | "interop-2023-events" 649 | ] 650 | }, 651 | { 652 | "name": "interop-2024-layout", 653 | "labels": [ 654 | "interop-2021-flexbox", 655 | "interop-2021-grid", 656 | "interop-2023-flexbox", 657 | "interop-2023-grid", 658 | "interop-2022-subgrid" 659 | ] 660 | }, 661 | { 662 | "name": "interop-2025-remove-mutation-events", 663 | "labels": [ 664 | "interop-2025-remove-mutation-events" 665 | ] 666 | }, 667 | { 668 | "name": "interop-2025-scrollend", 669 | "labels": [ 670 | "interop-2025-scrollend" 671 | ] 672 | }, 673 | { 674 | "name": "interop-2025-storageaccess", 675 | "labels": [ 676 | "interop-2025-storageaccess" 677 | ] 678 | }, 679 | { 680 | "name": "interop-2025-details", 681 | "labels": [ 682 | "interop-2025-details" 683 | ] 684 | }, 685 | { 686 | "name": "interop-2025-textdecoration", 687 | "labels": [ 688 | "interop-2025-textdecoration" 689 | ] 690 | }, 691 | { 692 | "name": "interop-2025-scope", 693 | "labels": [ 694 | "interop-2025-scope" 695 | ] 696 | }, 697 | { 698 | "name": "interop-2025-urlpattern", 699 | "labels": [ 700 | "interop-2025-urlpattern" 701 | ] 702 | }, 703 | { 704 | "name": "interop-2025-view-transitions", 705 | "labels": [ 706 | "interop-2025-view-transitions" 707 | ] 708 | }, 709 | { 710 | "name": "interop-2025-webassembly", 711 | "labels": [ 712 | "interop-2025-webassembly" 713 | ] 714 | }, 715 | { 716 | "name": "interop-2025-writingmodes", 717 | "labels": [ 718 | "interop-2025-writingmodes" 719 | ] 720 | }, 721 | { 722 | "name": "interop-2025-webcompat", 723 | "labels": [ 724 | "interop-2025-webcompat" 725 | ] 726 | }, 727 | { 728 | "name": "interop-2025-webrtc", 729 | "labels": [ 730 | "interop-2025-webrtc" 731 | ] 732 | } 733 | ] 734 | } 735 | } 736 | -------------------------------------------------------------------------------- /interop-scoring/main.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | 3 | 'use strict'; 4 | 5 | /** 6 | * Implements interop test results scoring as described in the Interop 2022 RFC: 7 | * https://github.com/web-platform-tests/rfcs/blob/master/rfcs/interop_2022.md#metrics 8 | * However, this script can also compute scores for later years as well. 9 | * 10 | * The results include an interoperability score which is calculated by aggregating 11 | * browser runs and determining the percentage of tests that pass in all observed browsers. 12 | */ 13 | 14 | const fetch = require('node-fetch'); 15 | const flags = require('flags'); 16 | const fs = require('fs'); 17 | const Git = require('nodegit'); 18 | const lib = require('../lib'); 19 | const moment = require('moment'); 20 | const path = require('path'); 21 | // Read category data by year from JSON file. 22 | const interopData = require('./category-data.json'); 23 | 24 | flags.defineStringList('products', ['chrome', 'firefox', 'safari'], 25 | 'Products to include (comma-separated)'); 26 | flags.defineString('year', '2022', 'Interop year to calculate'); 27 | flags.defineString('from', '2022-01-01', 'Starting date (inclusive)'); 28 | flags.defineString('to', moment().format('YYYY-MM-DD'), 29 | 'Ending date (exclusive)'); 30 | flags.defineBoolean('experimental', false, 31 | 'Calculate metrics for experimental runs.'); 32 | flags.parse(); 33 | 34 | const ROOT_DIR = path.join(__dirname, '..'); 35 | 36 | // All non-OK harness statuses. Any non-OK harness status should be investigated 37 | // before being added to this list, so that we don't score tests in the wrong 38 | // way because of a test or infrastructure issue. 39 | const KNOWN_TEST_STATUSES = new Set([ 40 | // ERROR due to duplicate subtest name, fixed in https://github.com/web-platform-tests/wpt/pull/38387 41 | '/css/css-color/parsing/color-invalid-color-function.html', 42 | // TIMEOUT in Safari due to https://webkit.org/b/212201 43 | '/css/css-grid/grid-definition/grid-limits-001.html', 44 | // TIMEOUT in Firefox and Safari, all subtests present 45 | '/css/css-scroll-snap/input/keyboard.html', 46 | // ERROR in Firefox, TIMEOUT in Safari, all subtests failing in Chrome 47 | '/css/css-scroll-snap/input/snap-area-overflow-boundary.html', 48 | // TIMEOUT in Chrome with TIMEOUT subtests 49 | '/dom/events/Event-dispatch-click.html', 50 | // ERROR in Safari but linked bug is fixed 51 | '/html/browsers/browsing-the-web/navigating-across-documents/replace-before-load/form-requestsubmit-during-load.html', 52 | '/html/browsers/browsing-the-web/navigating-across-documents/replace-before-load/form-requestsubmit-during-pageshow.html', 53 | // TIMEOUT in Safari, but just a single subtest 54 | '/html/semantics/forms/form-submission-0/form-double-submit-multiple-targets.html', 55 | // TIMEOUT in Firefox and Safari, but just a single subtest 56 | '/html/semantics/forms/form-submission-0/form-double-submit-to-different-origin-frame.html', 57 | // TIMEOUT in Safari but all passing subtests due to https://bugs.webkit.org/show_bug.cgi?id=235407 58 | '/html/semantics/forms/form-submission-target/rel-base-target.html', 59 | '/html/semantics/forms/form-submission-target/rel-button-target.html', 60 | '/html/semantics/forms/form-submission-target/rel-form-target.html', 61 | '/html/semantics/forms/form-submission-target/rel-input-target.html', 62 | // ERROR in Firefox 95 and Safari 15.2, since fixed 63 | '/html/semantics/interactive-elements/the-dialog-element/dialog-showModal.html', 64 | // ERROR in Chrome 96, since fixed 65 | '/html/semantics/interactive-elements/the-dialog-element/modal-dialog-ancestor-is-inert.html', 66 | // TIMEOUT in Safari, but all subtests present 67 | '/html/semantics/forms/textfieldselection/select-event.html', 68 | '/html/semantics/forms/textfieldselection/selection-start-end.html', 69 | '/html/semantics/forms/textfieldselection/textfieldselection-setRangeText.html', 70 | '/html/semantics/forms/textfieldselection/textfieldselection-setSelectionRange.html', 71 | // TIMEOUT in Firefox 98, since fixed 72 | '/html/semantics/forms/the-input-element/image-click-form-data.html', 73 | // TIMEOUT in Safari, but all subtests present 74 | '/html/semantics/forms/the-input-element/range-restore-oninput-onchange-event.html', 75 | // TIMEOUT in STP 137, since fixed 76 | '/html/semantics/interactive-elements/the-dialog-element/backdrop-receives-element-events.html', 77 | // TIMEOUT for one run in Safari but has since run successfully. 78 | '/css/css-scroll-snap/snap-at-user-scroll-end.html', 79 | 80 | 81 | /** 82 | * The tests below have non-OK statuses that have not been investigated as of today. 83 | */ 84 | // interop-2023-contain 85 | '/css/css-contain/container-queries/nested-query-containers.html', 86 | '/css/css-contain/content-visibility/content-visibility-input-image.html', 87 | '/css/css-contain/content-visibility/content-visibility-031.html', 88 | '/css/css-contain/content-visibility/content-visibility-auto-state-changed.html', 89 | '/css/selectors/invalidation/fullscreen-pseudo-class-in-has.html', 90 | '/css/selectors/invalidation/modal-pseudo-class-in-has.html', 91 | '/css/selectors/invalidation/user-action-pseudo-classes-in-has.html', 92 | // interop-2023-modules 93 | '/html/semantics/scripting-1/the-script-element/import-assertions/empty-assertion-clause.html', 94 | '/html/semantics/scripting-1/the-script-element/import-assertions/unsupported-assertion.html', 95 | '/workers/modules/dedicated-worker-import-blob-url.any.html', 96 | '/workers/modules/dedicated-worker-import-blob-url.any.worker.html', 97 | '/workers/modules/dedicated-worker-import-data-url-cross-origin.html', 98 | '/workers/modules/dedicated-worker-import-data-url.any.html', 99 | '/workers/modules/dedicated-worker-import-data-url.any.worker.html', 100 | '/workers/modules/dedicated-worker-import-meta.html', 101 | '/workers/modules/dedicated-worker-import.any.html', 102 | '/workers/modules/dedicated-worker-import.any.worker.html', 103 | '/workers/modules/dedicated-worker-options-credentials.html', 104 | '/workers/modules/dedicated-worker-parse-error-failure.html', 105 | '/workers/modules/shared-worker-import-data-url-cross-origin.html', 106 | '/workers/modules/shared-worker-import-data-url.window.html', 107 | '/workers/modules/shared-worker-options-credentials.html', 108 | '/workers/modules/shared-worker-parse-error-failure.html', 109 | '/import-maps/acquiring/modulepreload-link-header.html', 110 | '/import-maps/acquiring/modulepreload.html', 111 | '/workers/modules/shared-worker-import-failure.html', 112 | '/import-maps/acquiring/dynamic-import.html', 113 | '/import-maps/acquiring/script-tag-inline.html', 114 | '/import-maps/acquiring/script-tag.html', 115 | '/import-maps/bare-specifiers.sub.html', 116 | // interop-2023-offscreencanvas 117 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeat.outside.html', 118 | '/html/canvas/offscreen/manual/filter/offscreencanvas.filter.w.html', 119 | '/html/canvas/offscreen/manual/convert-to-blob/offscreencanvas.convert.to.blob.w.html', 120 | '/html/canvas/offscreen/manual/draw-generic-family/2d.text.draw.generic.family.w.html', 121 | '/html/canvas/offscreen/manual/filter/offscreencanvas.filter.w.html', 122 | '/html/canvas/offscreen/manual/the-offscreen-canvas/offscreencanvas.commit.w.html', 123 | '/html/canvas/offscreen/manual/the-offscreen-canvas/offscreencanvas.transfer.to.imagebitmap.w.html', 124 | '/html/canvas/offscreen/manual/the-offscreen-canvas/offscreencanvas.transferrable.w.html', 125 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeat.basic.html', 126 | '/html/canvas/offscreen/drawing-images-to-the-canvas/2d.drawImage.animated.poster.html', 127 | '/html/canvas/offscreen/compositing/2d.composite.globalAlpha.imagepattern.html', 128 | '/html/canvas/offscreen/compositing/2d.composite.uncovered.pattern.copy.html', 129 | '/html/canvas/offscreen/compositing/2d.composite.uncovered.pattern.destination-atop.html', 130 | '/html/canvas/offscreen/compositing/2d.composite.uncovered.pattern.destination-in.html', 131 | '/html/canvas/offscreen/compositing/2d.composite.uncovered.pattern.source-in.html', 132 | '/html/canvas/offscreen/compositing/2d.composite.uncovered.pattern.source-out.html', 133 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.basic.image.html', 134 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.crosscanvas.html', 135 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.norepeat.basic.html', 136 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.norepeat.coord1.html', 137 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.norepeat.coord2.html', 138 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.norepeat.coord3.html', 139 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.norepeat.outside.html', 140 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeat.coord3.html', 141 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeatx.coord1.html', 142 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeatx.outside.html', 143 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeaty.basic.html', 144 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeaty.coord1.html', 145 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeaty.outside.html', 146 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.repeat.empty.html', 147 | '/html/canvas/offscreen/shadows/2d.shadow.pattern.basic.html', 148 | '/html/canvas/offscreen/shadows/2d.shadow.pattern.transparent.2.html', 149 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeat.coord2.html', 150 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeatx.basic.html', 151 | '/html/canvas/offscreen/shadows/2d.shadow.pattern.alpha.html', 152 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.orientation.image.html', 153 | '/html/canvas/offscreen/fill-and-stroke-styles/2d.pattern.paint.repeat.coord1.html', 154 | '/html/canvas/offscreen/shadows/2d.shadow.pattern.transparent.1.html', 155 | // interop-2023-events 156 | '/uievents/mouse/cancel-mousedown-in-subframe.html', 157 | '/pointerevents/pointerevent_attributes_hoverable_pointers.html?mouse', 158 | '/pointerevents/pointerevent_attributes_nohover_pointers.html', 159 | '/pointerevents/pointerevent_disabled_form_control.html?mouse', 160 | '/html/user-activation/activation-trigger-pointerevent.html?mouse', 161 | '/pointerevents/pointerevent_movementxy.html?mouse', 162 | '/pointerevents/pointerevent_pointercapture_in_frame.html?mouse', 163 | '/uievents/mouse/attributes.html', 164 | // interop-2022-scrolling 165 | '/css/css-scroll-snap/snap-at-user-scroll-end.html', 166 | // interop-2023-webcodecs 167 | '/webcodecs/videoDecoder-codec-specific.https.any.html?av1', 168 | '/webcodecs/videoDecoder-codec-specific.https.any.html?h264_annexb', 169 | '/webcodecs/videoDecoder-codec-specific.https.any.html?h264_avc', 170 | '/webcodecs/videoDecoder-codec-specific.https.any.html?vp8', 171 | '/webcodecs/videoDecoder-codec-specific.https.any.html?vp9', 172 | '/webcodecs/videoDecoder-codec-specific.https.any.worker.html?av1', 173 | '/webcodecs/videoDecoder-codec-specific.https.any.worker.html?h264_annexb', 174 | '/webcodecs/videoDecoder-codec-specific.https.any.worker.html?h264_avc', 175 | '/webcodecs/videoDecoder-codec-specific.https.any.worker.html?vp8', 176 | '/webcodecs/videoDecoder-codec-specific.https.any.worker.html?vp9', 177 | '/webcodecs/videoDecoder-codec-specific.https.any.worker.html?av1', 178 | '/webcodecs/videoFrame-construction.any.html', 179 | '/webcodecs/videoFrame-construction.crossOriginSource.sub.html', 180 | '/webcodecs/videoFrame-construction.window.html', 181 | '/webcodecs/videoFrame-serialization.crossAgentCluster.https.html', 182 | '/webcodecs/videoFrame-serialization.crossAgentCluster.https.html', 183 | '/webcodecs/temporal-svc-encoding.https.any.html?h264', 184 | '/webcodecs/temporal-svc-encoding.https.any.html?vp8', 185 | '/webcodecs/temporal-svc-encoding.https.any.html?vp9', 186 | '/webcodecs/temporal-svc-encoding.https.any.worker.html?h264', 187 | '/webcodecs/temporal-svc-encoding.https.any.worker.html?vp8', 188 | '/webcodecs/temporal-svc-encoding.https.any.worker.html?vp9', 189 | '/webcodecs/videoFrame-serialization.crossAgentCluster.https.html', 190 | '/webcodecs/videoFrame-serialization.crossAgentCluster.https.html', 191 | '/webcodecs/videoFrame-serialization.crossAgentCluster.https.html', 192 | '/webcodecs/full-cycle-test.https.any.html?av1', 193 | '/webcodecs/full-cycle-test.https.any.html?h264_annexb', 194 | '/webcodecs/full-cycle-test.https.any.html?h264_avc', 195 | '/webcodecs/full-cycle-test.https.any.html?vp9_p0', 196 | '/webcodecs/full-cycle-test.https.any.html?vp9_p2', 197 | '/webcodecs/full-cycle-test.https.any.worker.html?av1', 198 | '/webcodecs/full-cycle-test.https.any.worker.html?h264_annexb', 199 | '/webcodecs/full-cycle-test.https.any.worker.html?h264_avc', 200 | '/webcodecs/full-cycle-test.https.any.worker.html?vp9_p0', 201 | '/webcodecs/full-cycle-test.https.any.worker.html?vp9_p2', 202 | '/webcodecs/full-cycle-test.https.any.html?vp8', 203 | '/webcodecs/full-cycle-test.https.any.worker.html?vp8', 204 | // interop-2023-webcomponents 205 | '/shadow-dom/focus/focus-shadowhost-display-none.html', 206 | '/custom-elements/form-associated/ElementInternals-labels.html', 207 | '/custom-elements/form-associated/ElementInternals-setFormValue.html', 208 | '/custom-elements/form-associated/ElementInternals-validation.html', 209 | '/custom-elements/form-associated/form-disabled-callback.html', 210 | ]); 211 | 212 | 213 | // Calculate interop score (passing in all browsers) for a category 214 | // after tracking the category's scores for each browser. 215 | // 216 | // Subtests can be difficult to match accurately, as they can sometimes be missing 217 | // from runs entirely, and passing/failing cannot be discerned. The approach taken 218 | // here simply checks the percentage of passing subtests within a test 219 | // for each browser and adds the minimum percentage of passing subtests to the 220 | // interop score. Tests (not subtests) that are missing entirely from a 221 | // browser run are marked with an interop score of 0. 222 | // 223 | // This deviates from another approach that attempted to match each subtest result 224 | // together from all browser runs and calculate an interop score based on parallel 225 | // subtest results. Due to some missing subtests, this score skewed lower than the 226 | // current implementation. Neither is without its drawbacks, and the hope is that 227 | // the current approach will score runs more optimistically and avoid subtest matching. 228 | function aggregateInteropTestScores(testPassCounts, numBrowsers) { 229 | if (testPassCounts.size === 0) return 0; 230 | let aggregateScore = 0; 231 | for (const testResults of testPassCounts.values()) { 232 | let minTestScore = 1; 233 | // If a test result value is missing from any browser, the interop score is 0. 234 | if (testResults['subtestTotal'].length !== numBrowsers) { 235 | minTestScore = 0; 236 | } else { 237 | // Find the lowest score for the test among all browser runs. 238 | for (let i = 0; i < numBrowsers; i++) { 239 | const testScore = ( 240 | testResults['subtestPasses'][i] / testResults['subtestTotal'][i]); 241 | minTestScore = Math.min(minTestScore, testScore); 242 | } 243 | } 244 | // Add the minimum test score to the aggregate interop score. 245 | aggregateScore += Math.floor(1000 * minTestScore); 246 | } 247 | return Math.floor(aggregateScore / testPassCounts.size) || 0; 248 | } 249 | 250 | // Score a set of runs (independently) on a set of tests. The runs are presumed 251 | // to be aligned in some way (i.e. they were all run at the same WPT SHA). 252 | // 253 | // Returns an array of scores, which is the top-level score (integer 0-1000) for 254 | // each corresponding input run. 255 | // 256 | // To get the top-level score for a run, each test in that run that is present 257 | // in |allTestsSet| is examined. Each test is scored 0-1000 based on the 258 | // fraction of its subtests that pass, with rounding down so that 1000 means 259 | // all subtests pass. Reftests score either 0 or 1000. These test scores are 260 | // then summed and divided by the size of |allTestsSet|, again rounding down. 261 | // 262 | // This methodology has several consequences: 263 | // 264 | // 1. Individual tests do have a heavier weight than subtests. This could be 265 | // gamed, by splitting passing tests into multiple files rather than using 266 | // subtests (or conversely by combining failing tests into subtests in a 267 | // single file). 268 | // 269 | // 2. If |allTestsSet| is constant across runs *through time*, older runs may 270 | // not have entries for tests were only added recently and will be penalized 271 | // for that. This is deliberate - see the comment block later in this 272 | // function for why. 273 | // 274 | // 3. We could show (on wpt.fyi) scores at both the test and category level as 275 | // a percentage with one decimal point, and what a user would see would be the 276 | // same numbers that go into the total score, with no hidden rounding error. 277 | // 278 | // 4. Because we round down twice, the score for a category can end up lower 279 | // than if we used rational numbers. 280 | function scoreRuns(runs, allTestsSet) { 281 | const scores = []; 282 | const testPassCounts = new Map(); 283 | const unexpectedNonOKTests = new Set(); 284 | 285 | try { 286 | for (const run of runs) { 287 | // Sum of the integer 0-1000 scores for each test. 288 | let score = 0; 289 | lib.results.walkTests(run.tree, (path, test, results) => { 290 | const testname = path + '/' + test; 291 | if (!allTestsSet.has(testname)) { 292 | return; 293 | } 294 | 295 | // TODO: Validate the data by checking that all statuses are recognized. 296 | 297 | let subtestPasses = 0; 298 | let subtestTotal = 1; 299 | 300 | // Keep subtest data for every test in order to calculate interop scores. 301 | // A test entry is created the first time each test is encountered. 302 | if (!testPassCounts.has(testname)) { 303 | testPassCounts.set(testname, {}); 304 | testPassCounts.get(testname)['subtestPasses'] = []; 305 | testPassCounts.get(testname)['subtestTotal'] = []; 306 | } 307 | if ('subtests' in results) { 308 | if (results['status'] != 'OK' && !KNOWN_TEST_STATUSES.has(testname)) { 309 | unexpectedNonOKTests.add(testname); 310 | } 311 | subtestTotal = results['subtests'].length; 312 | for (const subtest of results['subtests']) { 313 | if (subtest['status'] == 'PASS') { 314 | subtestPasses += 1; 315 | } 316 | } 317 | } else { 318 | if (results['status'] == 'PASS') { 319 | subtestPasses = 1; 320 | } 321 | } 322 | 323 | // Add an entry to subtest passes and total for calculating the interop score. 324 | const subtestCounts = testPassCounts.get(testname); 325 | subtestCounts['subtestPasses'].push(subtestPasses); 326 | subtestCounts['subtestTotal'].push(subtestTotal); 327 | 328 | // A single test is scored 0-1000 based on how many of its subtests 329 | // pass, rounding down so that 1000 always means fully passing. 330 | score += Math.floor(1000 * subtestPasses / subtestTotal); 331 | }); 332 | // We always normalize against the number of tests we are looking for, 333 | // rather than the total number of tests we found. The trade-off is all 334 | // about new tests being added to the set. 335 | // 336 | // If a large chunk of tests are introduced at date X, and they fail in 337 | // some browser, then runs after date X look worse if you're only 338 | // counting total tests found - even though the tests would have failed 339 | // before date X as well. 340 | // 341 | // Conversely, if a large chunk of tests are introduced at date X, and 342 | // they pass in some browser, then runs after date X would get an 343 | // artificial boost in pass-rate due to this - even if the tests would 344 | // have passed before date X as well. 345 | // 346 | // We consider the former case worse than the latter, so optimize for it 347 | // by always comparing against the full test list. This does mean that 348 | // when tests are added to the set, previously generated data is no 349 | // longer valid and this script should be re-run for all dates. 350 | scores.push(Math.floor(score / allTestsSet.size)); 351 | } 352 | } catch (e) { 353 | e.message += `\n\tRuns: ${runs.map(r => r.id)}`; 354 | throw e; 355 | } 356 | 357 | // Log and tests with unexpected non-OK statuses. 358 | if (unexpectedNonOKTests.size > 0) { 359 | console.log('Unexpected non-OK status for tests:'); 360 | for (const testname of unexpectedNonOKTests.values()) { 361 | console.log(testname); 362 | } 363 | } 364 | // Calculate the interop scores that have been saved and add 365 | // the interop score to the end of the browsers' scores array. 366 | scores.push(aggregateInteropTestScores(testPassCounts, runs.length)); 367 | return scores; 368 | } 369 | 370 | async function scoreCategory(category, experimental, products, alignedRuns, 371 | testsSet) { 372 | // Score the test runs. 373 | const before = Date.now(); 374 | const dateToScores = new Map(); 375 | for (const [date, runs] of alignedRuns.entries()) { 376 | const versions = runs.map(run => run.browser_version); 377 | const scores = scoreRuns(runs, testsSet); 378 | dateToScores.set(date, {versions, scores}); 379 | } 380 | const after = Date.now(); 381 | console.log(`Done scoring (took ${after - before} ms)`); 382 | 383 | // Return dateToScores, so that our caller can calculate the summary across 384 | // multiple categories. 385 | return dateToScores; 386 | } 387 | 388 | async function main() { 389 | const year = (flags.isSet('year')) ? flags.get('year') : '2023'; 390 | if (!year in interopData) { 391 | throw new Error(`Categories not defined for year ${year}`); 392 | } 393 | const categories = interopData[year].categories; 394 | 395 | const products = flags.get('products'); 396 | const repo = await Git.Repository.open( 397 | path.join(ROOT_DIR, 'results-analysis-cache.git')); 398 | 399 | // First, grab aligned runs from the server for the dates that we are 400 | // interested in. 401 | const from = (flags.isSet('from')) ? moment(flags.get('from')) : moment(`${year}-01-01`); 402 | const to = (flags.isSet('to')) ? moment(flags.get('to')) : moment(); 403 | 404 | const experimental = flags.get('experimental'); 405 | const alignedRuns = await lib.runs.fetchAlignedRunsFromServer( 406 | products, from, to, experimental); 407 | 408 | // Verify that we have data for the fetched runs in the results-analysis-cache 409 | // repo. 410 | console.log('Getting local set of run ids from repo'); 411 | let before = Date.now(); 412 | const localRunIds = await lib.results.getLocalRunIds(repo); 413 | let after = Date.now(); 414 | console.log(`Found ${localRunIds.size} ids (took ${after - before} ms)`); 415 | 416 | let hadErrors = false; 417 | for (const [date, runs] of alignedRuns.entries()) { 418 | for (const run of runs) { 419 | if (!localRunIds.has(run.id)) { 420 | // If you see this, you probably need to run git-write.js or just update 421 | // your results-analysis-cache.git repo; see the README.md. 422 | console.error(`Run ${run.id} missing from local git repo (${date})`); 423 | hadErrors = true; 424 | } 425 | } 426 | } 427 | if (hadErrors) { 428 | throw new Error('Missing data for some runs (see errors logged above). ' + 429 | 'Try running "git fetch --all --tags" in results-analysis-cache/'); 430 | } 431 | 432 | // Load the test result trees into memory; creates a list of recursive tree 433 | // structures: tree = { trees: [...], tests: [...] }. Each 'tree' represents a 434 | // directory, each 'test' is the results from a given test file. 435 | console.log('Iterating over all runs, loading test results'); 436 | before = Date.now(); 437 | for (const runs of alignedRuns.values()) { 438 | for (const run of runs) { 439 | // Just in case someone ever adds a 'tree' field to the JSON. 440 | if (run.tree) { 441 | throw new Error('Run JSON contains "tree" field; code needs changed.'); 442 | } 443 | run.tree = await lib.results.getGitTree(repo, run); 444 | } 445 | } 446 | after = Date.now(); 447 | console.log(`Loading ${alignedRuns.size} sets of runs took ` + 448 | `${after - before} ms`); 449 | 450 | const dateToScoresMaps = new Map(); 451 | 452 | // Map from labels to tests (includes) 453 | const labeledTests = new Map(); 454 | const url = 'https://wpt.fyi/api/metadata?includeTestLevel=true&product=chrome'; 455 | const response = await fetch(url); 456 | const metadata = await response.json(); 457 | for (const [test, metadataList] of Object.entries(metadata)) { 458 | for (const {label} of metadataList) { 459 | if (label) { 460 | if (!labeledTests.has(label)) { 461 | labeledTests.set(label, new Set()); 462 | } 463 | labeledTests.get(label).add(test); 464 | } 465 | } 466 | } 467 | // category is an object with "name" and "labels" props. 468 | for (const category of categories) { 469 | console.log(`Scoring runs for ${category.name}`); 470 | const testsSet = new Set(); 471 | // We aggregate all the tests with the labels defined in the category. 472 | for (const label of category.labels) { 473 | const labeledTestsSet = labeledTests.get(label); 474 | if (!labeledTestsSet || !labeledTestsSet.size) { 475 | throw new Error(`No tests labeled for ${label}`); 476 | } 477 | // Keep a unique set of tests associated with the category. 478 | labeledTestsSet.forEach(test => testsSet.add(test)); 479 | } 480 | const dateToScores = await scoreCategory(category, experimental, products, 481 | alignedRuns, testsSet); 482 | // Store the entire dateToScores for producing the unified CSV later. 483 | dateToScoresMaps.set(category.name, dateToScores); 484 | } 485 | 486 | // TODO: Once the other score CSVs are no longer used, we can push 487 | // some of this logic into scoreCategory and simplify things. 488 | let unifiedCsv = 'date'; 489 | for (const product of products) { 490 | const categoryLabels = categories.map(c => `${product}-${c.name}`); 491 | unifiedCsv += `,${product}-version,${categoryLabels.join()}`; 492 | } 493 | // Add the interop category headers. 494 | // An arbitrary interop version header is kept for ease of parsing on the interop dashboard. 495 | unifiedCsv += `,interop-version,${categories.map(c => `interop-${c.name}`)}`; 496 | unifiedCsv += '\n'; 497 | 498 | // We know that all dateToScoresMaps have the same dates (as they come from 499 | // the same runs), so we can just iterate the keys from the first. 500 | for (const date of dateToScoresMaps.get(categories[0].name).keys()) { 501 | let csvLine = [date.substr(0, 10)]; 502 | // This is essentially an inversion loop; we have the data mapped by 503 | // individual categories, but we need it mapped by product. 504 | for (let browserIdx = 0; browserIdx < products.length; browserIdx++) { 505 | let version; 506 | const productScores = []; 507 | for (const category of categories) { 508 | const {versions, scores} = dateToScoresMaps.get(category.name).get(date); 509 | const score = scores[browserIdx]; 510 | productScores.push(score); 511 | // The versions should all be the same, so we just grab the latest one. 512 | version = versions[browserIdx]; 513 | } 514 | csvLine.push(version); 515 | csvLine = csvLine.concat(productScores); 516 | } 517 | // Add the interop scores for each category. 518 | csvLine.push('-'); 519 | for (const category of categories) { 520 | const scoreInfo = dateToScoresMaps.get(category.name).get(date); 521 | const categoryInteropScore = scoreInfo.scores[products.length]; 522 | csvLine.push(categoryInteropScore); 523 | } 524 | unifiedCsv += `${csvLine.join()}\n`; 525 | } 526 | 527 | // Use "-v2" suffix to differentiate from the old csv formats. 528 | const csvFilename = experimental ? 529 | `interop-${year}-experimental-v2.csv` : `interop-${year}-stable-v2.csv`; 530 | await fs.promises.writeFile(csvFilename, unifiedCsv, 'utf-8'); 531 | console.log(`Wrote scores to ${csvFilename}`); 532 | } 533 | 534 | main().catch(reason => { 535 | console.error(reason); 536 | process.exit(1); 537 | }); 538 | -------------------------------------------------------------------------------- /lib/browser-specific.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | 3 | 'use strict'; 4 | 5 | /** 6 | * Implements functionality to report on how many WPT tests fail only on one 7 | * browser (aka browser-specific failures). 8 | */ 9 | 10 | const TEST_PASS_STATUSES = ['PASS']; 11 | const TEST_FAIL_STATUSES = ['FAIL', 'ERROR', 'TIMEOUT', 'CRASH']; 12 | // An empty string has been seen for some tests; see 13 | // https://github.com/web-platform-tests/wpt/issues/22306 14 | const TEST_NEUTRAL_STATUSES = ['PRECONDITION_FAILED', 'SKIP', '']; 15 | const KNOWN_TEST_STATUSES = TEST_PASS_STATUSES.concat( 16 | TEST_FAIL_STATUSES, TEST_NEUTRAL_STATUSES); 17 | 18 | const SUBTEST_PASS_STATUSES = ['PASS']; 19 | const SUBTEST_FAIL_STATUSES = ['FAIL', 'ERROR', 'TIMEOUT', 'NOTRUN']; 20 | const SUBTEST_NEUTRAL_STATUSES = ['PRECONDITION_FAILED', 'SKIP']; 21 | const KNOWN_SUBTEST_STATUSES = SUBTEST_PASS_STATUSES.concat( 22 | SUBTEST_FAIL_STATUSES, SUBTEST_NEUTRAL_STATUSES); 23 | 24 | // Across runs of WPT, there is a lot of duplication of results. Since we store 25 | // the results in a Git repository, lib/results.js is able to automatically 26 | // de-duplicate identical sub-trees (directories) and blobs (test files), and 27 | // assign them unique identitifers. We can then use those unique identifiers to 28 | // cache score results for sets of sub-trees and tests that we see when scoring 29 | // many browser runs. 30 | // 31 | // These caches map from collections of input sub-trees or input tests, to the 32 | // score array created for them. Note that order is important; if we see the 33 | // sub-trees '1-2-3' and cache a score array [a, b, c] for them, we *cannot* 34 | // re-use [a, b, c] if we later see '3-2-1' (the array would be [c, b, a] 35 | // then!). In theory one could do some clever work to re-order the score array 36 | // in that case, but it's overkill; the naive caches here reduce the processing 37 | // time from ~minutes per year of runs to ~3s a year on my desktop. 38 | const treesScoreCache = new Map; 39 | const testsScoreCache = new Map; 40 | 41 | // A helper class providing an iterator-like interface to either an Object with 42 | // enumerable properties, or an Array. Iterates in a sorted order determined by 43 | // |comparatorFunc|. 44 | // 45 | // Note that this class mutates the input |arrOrObject|. 46 | class IteratorHelper { 47 | constructor(arrOrObject, comparatorFunc) { 48 | this.currentIndex = 0; 49 | this.values = arrOrObject; 50 | this.comparator = comparatorFunc; 51 | 52 | if (Array.isArray(this.values)) { 53 | this.keys = null; 54 | this.maxIndex = this.values.length - 1; 55 | this.values.sort(comparatorFunc); 56 | } else { 57 | this.keys = Object.keys(this.values); 58 | this.maxIndex = this.keys.length - 1; 59 | this.keys.sort(comparatorFunc); 60 | } 61 | } 62 | 63 | hasCurrent() { 64 | return this.currentIndex <= this.maxIndex; 65 | } 66 | 67 | // Advances the iterator to the next element of the collection. Returns true 68 | // if the iterator was successfully advanced, false if it has passed the end 69 | // of the collection. 70 | moveNext() { 71 | if (this.currentIndex > this.maxIndex) { 72 | return false; 73 | } 74 | this.currentIndex++; 75 | return true; 76 | } 77 | 78 | key() { 79 | if (this.keys === null) { 80 | throw new Error('Cannot get key of an Array iteration'); 81 | } 82 | return this.keys[this.currentIndex]; 83 | } 84 | 85 | value() { 86 | if (this.keys === null) { 87 | return this.values[this.currentIndex]; 88 | } 89 | return this.values[this.key()]; 90 | } 91 | } 92 | 93 | function findSmallestNameAndIndex(browserSubtests) { 94 | const comparator = browserSubtests[0].comparator; 95 | let smallest = null; 96 | let smallestIdx = null; 97 | for (let i = 0; i < browserSubtests.length; i++) { 98 | if (!browserSubtests[i].hasCurrent()) { 99 | continue; 100 | } 101 | if (smallest == null || 102 | comparator(browserSubtests[i].value(), smallest) < 0) { 103 | smallest = browserSubtests[i].value(); 104 | smallestIdx = i; 105 | } 106 | } 107 | 108 | return [smallest.name, smallestIdx]; 109 | } 110 | 111 | // Scores a WPT test that contains subtests, returning an array of scores for 112 | // each browser in the same order as |browserTests|. 113 | // 114 | // For each browser, each subtest is scored either 0 or 1 based on whether it is 115 | // a browser-specific failure. We then normalize the subtest scores such that 116 | // the worst possible score for a given test would be '1', to avoid tests with 117 | // thousands of subtests from overwhelming the results. 118 | function scoreSubtests(browserSubtests) { 119 | // To avoid errors from summing small floats, we do a full count of 120 | // browser-specific subtest failures first, then divide by the number of 121 | // subtests later to get the score (see the note on normalization above). 122 | let denominator = 0; 123 | let prevName = null; 124 | const counts = new Array(browserSubtests.length).fill(0); 125 | 126 | while (browserSubtests.some(subtests => subtests.hasCurrent())) { 127 | const [name] = findSmallestNameAndIndex(browserSubtests); 128 | const onSameSubtest = browserSubtests.filter(s => s.hasCurrent() && 129 | s.value().name == name); 130 | 131 | if (name === prevName) { 132 | // There actually (rarely) exist distinct subtests with the same name in 133 | // the data, usually because of unprintable characters. This can influence 134 | // the result as we may mismatch results (i.e. if some browser has results 135 | // for one duplicate-named subtest but not another). 136 | // 137 | // We avoid scoring another subtest result as otherwise we end up with the 138 | // denominator being greater than the number of unique subtests, which is 139 | // a very surprising outcome. Thus, in this branch, do nothing. 140 | } else if (onSameSubtest.length < browserSubtests.length) { 141 | // At this point at least one browser is missing a test that at least 142 | // one other browser has. This could be a browser-specific failure, if 143 | // exactly N-1 browsers have a passing state for that test (as we are 144 | // treating missing as a failure state). 145 | denominator += 1; 146 | if (onSameSubtest.length == browserSubtests.length - 1 && 147 | onSameSubtest.every(s => TEST_PASS_STATUSES.includes( 148 | s.value().status))) { 149 | for (let i = 0; i < browserSubtests.length; i++) { 150 | if (!browserSubtests[i].hasCurrent() || 151 | browserSubtests[i].value().name != name) { 152 | counts[i] += 1; 153 | break; 154 | } 155 | } 156 | } 157 | } else { 158 | // The iterators are all aligned at the same subtest, so score it! 159 | denominator += 1; 160 | 161 | let failed = []; 162 | for (let i = 0; i < browserSubtests.length; i++) { 163 | const status = browserSubtests[i].value().status; 164 | if (!KNOWN_SUBTEST_STATUSES.includes(status)) { 165 | throw new Error(`Unknown subtest status for ` + 166 | `'${browserSubtests[i].name}': '${status}'`); 167 | } 168 | 169 | // A 'neutral' subtest status means that a browser has a result which is 170 | // not a failure, but which is also not a proper pass (one such example 171 | // is SKIP). If any browser has such a status, no browser can be a 172 | // browser-specific failure (since we don't know what the 'real' result 173 | // for the neutral-status browser would be). 174 | if (SUBTEST_NEUTRAL_STATUSES.includes(status)) { 175 | failed = []; 176 | break; 177 | } 178 | 179 | if (SUBTEST_FAIL_STATUSES.includes(status)) { 180 | failed.push(i); 181 | } 182 | } 183 | if (failed.length == 1) { 184 | counts[failed[0]] += 1; 185 | } 186 | } 187 | 188 | prevName = name; 189 | onSameSubtest.forEach(subtest => subtest.moveNext()); 190 | } 191 | 192 | if (denominator == 0) { 193 | return new Array(browserSubtests.length).fill(0); 194 | } 195 | return counts.map(count => count / denominator); 196 | } 197 | 198 | function scoreTopLevelTest(browserTests) { 199 | let failed = []; 200 | for (let i = 0; i < browserTests.length; i++) { 201 | const status = browserTests[i].status; 202 | if (!KNOWN_TEST_STATUSES.includes(status)) { 203 | throw new Error(`Unknown test status: '${status}'`); 204 | } 205 | 206 | // A 'neutral' test status means that a browser has a result which is not a 207 | // failure, but which is also not a proper pass (one such example is SKIP). 208 | // If any browser has such a status, no browser can be a browser-specific 209 | // failure (since we don't know what the 'real' result for the neutral 210 | // status browser would be). 211 | if (TEST_NEUTRAL_STATUSES.includes(status)) { 212 | failed = []; 213 | break; 214 | } 215 | 216 | if (TEST_FAIL_STATUSES.includes(status)) { 217 | failed.push(i); 218 | } 219 | } 220 | 221 | const scores = new Array(browserTests.length).fill(0); 222 | if (failed.length == 1) { 223 | scores[failed[0]] += 1; 224 | } 225 | return scores; 226 | } 227 | 228 | // Scores a particular WPT test for a set of browsers, returning an array of 229 | // scores for each browser in the same order as |browserTests|. 230 | function scoreTest(browserTests) { 231 | const cacheKey = browserTests.map(test => test.id).join('-'); 232 | if (testsScoreCache.has(cacheKey)) { 233 | return testsScoreCache.get(cacheKey); 234 | } 235 | 236 | let scores = new Array(browserTests.length).fill(0); 237 | 238 | // Some WPT tests contain multiple 'subtests' (e.g. most testharness.js 239 | // tests), whilst others are just a single conceptual test (e.g. reftests). 240 | // 241 | // Tests without subtests are scored as a simple 0-or-1 for each failing 242 | // browser (0 if any other browser also fails, 1 if no other browser fails). 243 | // When there are subtests, we do a similar calculation per-subtest, but 244 | // normalize the results by the number of subtests in the test. This stops 245 | // tests with thousands of subtests from dominating the results. 246 | if (browserTests.every(t => !t.subtests || t.subtests.length == 0)) { 247 | scores = scoreTopLevelTest(browserTests); 248 | } else if (browserTests.every(t => t.subtests && t.subtests.length > 0)) { 249 | const comparator = (s1, s2) => (s1.name > s2.name) - (s1.name < s2.name); 250 | scores = scoreSubtests(browserTests.map( 251 | tests => new IteratorHelper(tests.subtests, comparator))); 252 | } 253 | 254 | testsScoreCache.set(cacheKey, scores); 255 | return scores; 256 | } 257 | 258 | // Walks a set of trees, one per browser, scoring them for browser-specific 259 | // failures of tests in the trees. 260 | function walkTrees(browserTrees, currentPath) { 261 | const cacheKey = browserTrees.map(tree => tree.id).join('-'); 262 | if (treesScoreCache.has(cacheKey)) { 263 | return treesScoreCache.get(cacheKey); 264 | } 265 | 266 | let scores = new Array(browserTrees.length).fill(0); 267 | 268 | // Sorting comparator to sort Object keys alphabetically. 269 | const keyComparator = (k1, k2) => (k1 > k2) - (k1 < k2); 270 | 271 | // First deal with any tests that are at this level of the tree. 272 | const browserTests = browserTrees.map( 273 | tree => new IteratorHelper(tree.tests, keyComparator)); 274 | // As we are dealing with the intersection of tests between browsers, we are 275 | // done once we have exhausted all tests from some browser (leftover tests in 276 | // other browsers don't matter). 277 | while (browserTests.every(tests => tests.hasCurrent())) { 278 | // If we are looking at the same test across all browsers, but they aren't 279 | // the exact same objects, they need to be scored! 280 | const testName = browserTests[0].key(); 281 | const fullTestName = `${currentPath}/${testName}`; 282 | if (browserTests.every(t => t.key() === testName) && 283 | !browserTests.every(t => t.value() === browserTests[0].value())) { 284 | try { 285 | const testScores = scoreTest( 286 | browserTests.map(t => t.value())); 287 | scores = scores.map((v, i) => v + testScores[i]); 288 | browserTests.forEach(t => t.moveNext()); 289 | continue; 290 | } catch (e) { 291 | e.message += `\n\tTest: ${fullTestName}`; 292 | throw e; 293 | } 294 | } 295 | 296 | // Our iterators are not pointing at the same test; find the earliest 297 | // iterator and move it forward. 298 | let smallestKey = browserTests[0].key(); 299 | let smallestIdx = 0; 300 | for (let i = 1; i < browserTests.length; i++) { 301 | if (keyComparator(browserTests[i].key(), smallestKey) < 0) { 302 | smallestKey = browserTests[i].key(); 303 | smallestIdx = i; 304 | } 305 | } 306 | browserTests[smallestIdx].moveNext(); 307 | } 308 | 309 | // Now recurse into subtrees. 310 | const browserSubtrees = browserTrees.map( 311 | tree => new IteratorHelper(tree.trees, keyComparator)); 312 | while (browserSubtrees.every(subtree => subtree.hasCurrent())) { 313 | // If the subtrees are all the same object (which happens due to the caching 314 | // in lib/results.js), we can just skip them; it is impossible for there to 315 | // be browser-specific failures in the subtree. 316 | if (browserSubtrees.every(s => s.value() === browserSubtrees[0].value())) { 317 | browserSubtrees.forEach(s => s.moveNext()); 318 | continue; 319 | } 320 | 321 | // If all the iterators are pointing at the same directory (subtree), then 322 | // we should recurse into those subtrees to score them. 323 | const dirName = browserSubtrees[0].key(); 324 | if (browserSubtrees.every(s => s.key() === dirName)) { 325 | const subtreeScores = walkTrees( 326 | browserSubtrees.map(s => s.value()), 327 | `${currentPath}/${dirName}`); 328 | scores = scores.map((v, i) => v + subtreeScores[i]); 329 | browserSubtrees.forEach(s => s.moveNext()); 330 | continue; 331 | } 332 | 333 | // Our iterators are not pointing at the same subtree; find the earliest 334 | // iterator and move it forward. 335 | let smallestKey = browserSubtrees[0].key(); 336 | let smallestIdx = 0; 337 | for (let i = 1; i < browserSubtrees.length; i++) { 338 | if (keyComparator(browserSubtrees[i].key(), smallestKey) < 0) { 339 | smallestKey = browserSubtrees[i].key(); 340 | smallestIdx = i; 341 | } 342 | } 343 | browserSubtrees[smallestIdx].moveNext(); 344 | } 345 | 346 | treesScoreCache.set(cacheKey, scores); 347 | 348 | return scores; 349 | } 350 | 351 | // Produces a 'score' of browser-specific failures for a given set of runs from 352 | // different products on the same WPT codebase. The word 'score' is used instead 353 | // of count as we normalize the counts of subtests. 354 | // 355 | // runs: an array of run objects, where each run has the form: 356 | // {browser_name: "foo", tree: } 357 | // 358 | // expectedBrowsers: the set of browsers that should be (exactly) represented in 359 | // runs. If a browser is missing, an exception will be thrown. 360 | // 361 | // Returns a map from product name to score. 362 | function scoreBrowserSpecificFailures(runs, expectedBrowsers) { 363 | // First, verify that the expected browsers are seen in |runs|. 364 | const seenBrowsers = new Set(); 365 | for (const run of runs) { 366 | const browserName = run.browser_name; 367 | if (!expectedBrowsers.has(browserName)) { 368 | throw new Error(`Unexpected browser found in runs: ${browserName}`); 369 | } 370 | if (seenBrowsers.has(browserName)) { 371 | throw new Error(`${browserName} has multiple entries in runs`); 372 | } 373 | seenBrowsers.add(browserName); 374 | } 375 | // Browsers can only be added to seenBrowsers if they were already in 376 | // expectedBrowsers (see above), so the only remaining possible error is a 377 | // missing browser in the runs. 378 | if (seenBrowsers.size != expectedBrowsers.size) { 379 | const difference = [...expectedBrowsers].filter(x => !seenBrowsers.has(x)); 380 | throw new Error(`Missing runs for browsers: ${difference.join(',')}`); 381 | } 382 | 383 | 384 | // Now do the actual walk to score the runs. 385 | const scores = walkTrees(runs.map(run => run.tree), ''); 386 | return new Map(scores.map((score, i) => [runs[i].browser_name, score])); 387 | } 388 | 389 | module.exports = {scoreBrowserSpecificFailures}; 390 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const browserSpecific = require('./browser-specific'); 4 | const results = require('./results'); 5 | const runs = require('./runs'); 6 | 7 | module.exports = {browserSpecific, results, runs}; 8 | -------------------------------------------------------------------------------- /lib/results.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | 3 | 'use strict'; 4 | 5 | /** 6 | * Utility functions for interacting with WPT run results encoded in a git 7 | * repository (results-analysis-cache). 8 | * 9 | * The results-analysis-cache repository stores results from WPT runs as individual 10 | * orphan commits, each pointed to by a tag. A given commit (aka run) stores the 11 | * results in expanded directory form, where each WPT test has a results JSON 12 | * file stored at /root/path/to/test/test_name.json. 13 | * 14 | * Storing runs this way allows us to use git's built-in object deduplication to 15 | * compress results whilst still having easy access to them. We can keep that 16 | * compression even when loading results into memory, by having a cache of 17 | * trees/results keyed off of the git unique object ids. 18 | * 19 | * When a run is loaded into memory, we store it as a tree, where each node 20 | * represents a directory. A node has a (possibly empty) map of directory name 21 | * to child node and a (possibly empty) map of test name (for tests in the 22 | * node's directory) to results JSON. 23 | * 24 | * The results JSON for a particular test looks something like: 25 | * 26 | * {status: OK, subtests: [{ name: "Foo", status: "PASS" }, ...]} 27 | * 28 | * The 'subtests' array may be missing, if the test is a reftest or single page 29 | * test[0]. In that case, the top level status will be a PASS/FAIL/TIMEOUT/etc 30 | * rather than a harness status. 31 | * 32 | * [0]: https://web-platform-tests.org/writing-tests/testharness-api.html#single-page-tests 33 | */ 34 | 35 | const Git = require('nodegit'); 36 | 37 | // Map from object id to { "trees": { ... }, "tests": { ... } } objects. 38 | const treeCache = {}; 39 | 40 | // Map from object id to { "status": "OK", ... } objects. 41 | const testCache = {}; 42 | 43 | // Convert a git object id to a key in the above maps. 44 | function oidToKey(oid) { 45 | return oid.tostrS(); 46 | } 47 | 48 | // To make checking for identical trees and tests easier for consumers, we 49 | // assign them each unique ids. 50 | 51 | const newTreeId = (() => { 52 | let id = 0; 53 | return () => { 54 | return ++id; 55 | }; 56 | })(); 57 | 58 | const newTestId = (() => { 59 | let id = 0; 60 | return () => { 61 | return ++id; 62 | }; 63 | })(); 64 | 65 | async function readResults(entry) { 66 | if (!entry.isBlob()) { 67 | throw new TypeError('y no Blob?'); 68 | } 69 | 70 | const key = oidToKey(entry.id()); 71 | 72 | const cachedTest = testCache[key]; 73 | if (cachedTest) { 74 | return cachedTest; 75 | } 76 | 77 | const blob = await entry.getBlob(); 78 | const buffer = blob.content(); 79 | const results = JSON.parse(buffer); 80 | 81 | if (results.id !== undefined) { 82 | throw new Error('Results JSON already has an "id" field; code needs changed'); 83 | } 84 | results.id = newTestId(); 85 | 86 | testCache[key] = results; 87 | return results; 88 | } 89 | 90 | async function readTree(treeOrEntry) { 91 | let tree; let entry; let oid; 92 | if (treeOrEntry instanceof Git.Tree) { 93 | tree = treeOrEntry; 94 | oid = tree.id(); 95 | } else { 96 | if (!(treeOrEntry instanceof Git.TreeEntry) || !treeOrEntry.isTree()) { 97 | throw new TypeError('y no Tree or TreeEntry?'); 98 | } 99 | entry = treeOrEntry; 100 | oid = entry.id(); 101 | } 102 | 103 | const key = oidToKey(oid); 104 | 105 | const cachedTree = treeCache[key]; 106 | if (cachedTree) { 107 | return cachedTree; 108 | } 109 | 110 | const newTree = { 111 | id: newTreeId(), 112 | trees: {}, 113 | tests: {}, 114 | }; 115 | 116 | if (!tree) { 117 | tree = await entry.getTree(); 118 | } 119 | 120 | for (const entry of tree.entries()) { 121 | if (entry.isTree()) { 122 | newTree.trees[entry.name()] = await readTree(entry); 123 | } else if (entry.isBlob()) { 124 | let name = entry.name(); 125 | if (!name.endsWith('.json')) { 126 | throw new Error('y not .json?'); 127 | } 128 | name = decodeURIComponent(name.substr(0, name.length - 5)); 129 | newTree.tests[name] = await readResults(entry); 130 | } else { 131 | throw new TypeError('y not tree or blob?'); 132 | } 133 | } 134 | 135 | treeCache[key] = newTree; 136 | return newTree; 137 | } 138 | 139 | // Read the tree for a given run fully into memory, converting it into our 140 | // internal representation (see module documentation). 141 | async function getGitTree(repo, run) { 142 | const commit = await repo.getReferenceCommit(`refs/tags/run/${run.id}/results`); 143 | const tree = await commit.getTree(); 144 | 145 | return readTree(tree); 146 | } 147 | 148 | // Return a set of run ids, determined from the tags of the git repo. 149 | async function getLocalRunIds(repo) { 150 | const refs = await repo.getReferences(); 151 | const tags = refs.filter(ref => ref.isTag()); 152 | tags.sort(); 153 | 154 | return new Set(tags.map(tag => { 155 | // format is refs/tags/run/6286849043595264/results 156 | const parts = tag.toString().split('/'); 157 | return Number(parts[3]); 158 | })); 159 | } 160 | 161 | // Walks an input tree in depth-first order, calling the visitor function on 162 | // each test in the tree. The visitor function should be of the form: 163 | // visitor(path, test_name, test_results) 164 | // 165 | // Where test_results is an object as described in the module documentation. 166 | function walkTests(tree, visitor, path='') { 167 | for (const [dir, subtree] of Object.entries(tree.trees)) { 168 | walkTests(subtree, visitor, `${path}/${dir}`); 169 | } 170 | 171 | for (const [name, results] of Object.entries(tree.tests)) { 172 | visitor(path, name, results); 173 | } 174 | } 175 | 176 | // treeCache, testCache exposed only for git-query.js to report memory stats. 177 | module.exports = {getGitTree, getLocalRunIds, walkTests, treeCache, testCache}; 178 | -------------------------------------------------------------------------------- /lib/runs.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const fetch = require('node-fetch'); 4 | const fs = require('fs'); 5 | const moment = require('moment'); 6 | const path = require('path'); 7 | const {advanceDateToSkipBadDataIfNecessary} = require('../bad-ranges'); 8 | 9 | const RUNS_API = 'https://wpt.fyi/api/runs'; 10 | 11 | function apiURL(options = {}) { 12 | const url = new URL(RUNS_API); 13 | for (let [name, value] of Object.entries(options)) { 14 | if (Array.isArray(value)) { 15 | value = value.join(','); 16 | } 17 | url.searchParams.set(name, value); 18 | } 19 | return url; 20 | } 21 | 22 | async function get(options) { 23 | const url = apiURL(options); 24 | // console.log(`Fetching ${url}`); 25 | return fetch(url).then(r => r.json()); 26 | } 27 | 28 | async function getAll(options) { 29 | const runs = []; 30 | for await (const run of getIterator(options)) { 31 | runs.push(run); 32 | } 33 | 34 | // Sort runs by start time, most recent first. This is the order that the API 35 | // uses as well, but due to pagination it will not be strictly sorted. 36 | runs.sort((a, b) => { 37 | return Date.parse(b.time_start) - Date.parse(a.time_start); 38 | }); 39 | 40 | return runs; 41 | } 42 | 43 | async function* getIterator(options) { 44 | options = Object.assign({'max-count': 500}, options); 45 | 46 | let url = apiURL(options); 47 | let previousUrl = null; 48 | while (true) { 49 | const r = await fetch(url); 50 | // wpt.fyi API returns 404 with an empty result set 51 | if (!r.ok && r.status !== 404) { 52 | let msg = `non-OK, non-404 fetch status ${r.status} when fetching ${url}`; 53 | if (previousUrl) { 54 | msg += ` (previous url was ${previousUrl})`; 55 | } 56 | throw new Error(msg); 57 | } 58 | 59 | const runs = await r.json(); 60 | for (const run of runs) { 61 | yield run; 62 | } 63 | const token = r.headers.get('wpt-next-page'); 64 | if (!token) { 65 | break; 66 | } 67 | previousUrl = url; 68 | url = new URL(RUNS_API); 69 | url.searchParams.set('page', token); 70 | } 71 | } 72 | 73 | 74 | // Fetches aligned runs from the wpt.fyi server, between the |from| and |to| 75 | // dates. If |experimental| is true fetch experimental runs, else stable runs. 76 | // Returns a map of date to list of runs for that date (one per product) 77 | // 78 | // TODO: Known problem: there are periods of time, mostly mid-late 2018, where 79 | // we ran both Safari 11.1 and 12.1, and the results are massively different. 80 | // We should fetch multiple runs for each browser and have upgrade logic. 81 | async function fetchAlignedRunsFromServer(products, from, to, experimental) { 82 | const label = experimental ? 'experimental' : 'stable'; 83 | let params = `label=master&label=${label}`; 84 | for (const product of products) { 85 | params += `&product=${product}`; 86 | } 87 | const runsUri = `${RUNS_API}?aligned=true&max-count=1&${params}`; 88 | 89 | console.log(`Fetching aligned runs from ${from.format('YYYY-MM-DD')} ` + 90 | `to ${to.format('YYYY-MM-DD')}`); 91 | 92 | let cachedCount = 0; 93 | const before = moment(); 94 | const noCacheAfter = moment().subtract('3', 'days'); 95 | const alignedRuns = new Map(); 96 | 97 | while (from < to) { 98 | const yesterday = moment(from).subtract(1, 'days'); 99 | const today = moment(from); 100 | from.add(1, 'days'); 101 | const tomorrow = moment(from); 102 | 103 | const formattedFrom = yesterday.format('YYYY-MM-DD'); 104 | const formattedTo = tomorrow.format('YYYY-MM-DD'); 105 | 106 | // We advance the date (if necessary) before doing anything more, so that 107 | // code later in the loop body can just 'continue' without checking. 108 | from = advanceDateToSkipBadDataIfNecessary(from, experimental); 109 | 110 | // Attempt to read the runs from the cache. 111 | // TODO: Consider https://github.com/tidoust/fetch-filecache-for-crawling 112 | let runs; 113 | const cacheFilename = 114 | [label, ...products, 'runs', formattedFrom, formattedTo].join('-') + 115 | '.json'; 116 | const cacheFile = path.join(__dirname, '..', 'cache', cacheFilename); 117 | try { 118 | runs = JSON.parse(await fs.promises.readFile(cacheFile)); 119 | if (runs.length) { 120 | cachedCount++; 121 | } 122 | } catch (e) { 123 | // No cache hit; load from the server instead. 124 | const url = `${runsUri}&from=${formattedFrom}&to=${formattedTo}`; 125 | const response = await fetch(url); 126 | runs = await response.json(); 127 | 128 | if (from.isSameOrBefore(noCacheAfter)) { 129 | // Avoid caching for the last few days, as new runs may still appear 130 | // here; otherwise, cache unconditionally, even if we do not have an 131 | // aligned set of runs. 132 | await fs.promises.writeFile(cacheFile, JSON.stringify(runs)); 133 | } 134 | } 135 | 136 | if (!runs.length) { 137 | continue; 138 | } 139 | 140 | if (runs.length !== products.length) { 141 | throw new Error( 142 | `Fetched ${runs.length} runs, expected ${products.length}`); 143 | } 144 | 145 | if ( 146 | !runs.some(run => 147 | moment(run.time_start, moment.ISO_8601).isSame(today, 'day'), 148 | ) 149 | ) { 150 | continue; 151 | } 152 | 153 | alignedRuns.set(today.format('YYYY-MM-DD'), runs); 154 | } 155 | const after = moment(); 156 | console.log(`Fetched ${alignedRuns.size} sets of runs in ` + 157 | `${after - before} ms (${cachedCount} cached)`); 158 | 159 | return alignedRuns; 160 | } 161 | 162 | module.exports = {get, getAll, getIterator, fetchAlignedRunsFromServer}; 163 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "results-analysis", 3 | "description": "web-platform-tests results analysis", 4 | "version": "0.0.1", 5 | "license": "BSD-3-Clause", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/web-platform-tests/results-analysis.git" 9 | }, 10 | "scripts": { 11 | "install": "git clone --bare https://github.com/web-platform-tests/results-analysis-cache.git && git --git-dir=results-analysis-cache.git remote set-url --push origin git@github.com:web-platform-tests/results-analysis-cache.git || git --git-dir=results-analysis-cache.git fetch --tags", 12 | "lint": "eslint .", 13 | "test": "npm run lint && npm run test-unit", 14 | "test-unit": "mocha -u bdd ./test/*.js" 15 | }, 16 | "dependencies": { 17 | "flags": "0.1.3", 18 | "moment": "2.30.1", 19 | "node-fetch": "2.7.0", 20 | "nodegit": "0.28.0-alpha.28" 21 | }, 22 | "devDependencies": { 23 | "chai": "4.5.0", 24 | "eslint": "8.57.1", 25 | "eslint-config-google": "0.14.0", 26 | "mocha": "11.2.2" 27 | }, 28 | "engines": { 29 | "node": "^18" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /test/bad-ranges.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const assert = require('chai').assert; 4 | const moment = require('moment'); 5 | 6 | const {advanceDateToSkipBadDataIfNecessary} = require('../bad-ranges'); 7 | 8 | describe('bad-ranges.js', () => { 9 | it('should advance date at beginning of bad range', () => { 10 | const date = moment('2019-02-06'); 11 | const adjusted = advanceDateToSkipBadDataIfNecessary(date); 12 | assert.equal(adjusted.format('YYYY-MM-DD'), '2019-03-09'); 13 | }); 14 | 15 | it('should advance date in middle of bad range', () => { 16 | const date = moment('2019-02-28'); 17 | const adjusted = advanceDateToSkipBadDataIfNecessary(date); 18 | assert.equal(adjusted.format('YYYY-MM-DD'), '2019-03-09'); 19 | }); 20 | 21 | it('should NOT advance date at end of bad range', () => { 22 | const date = moment('2019-03-09'); 23 | const adjusted = advanceDateToSkipBadDataIfNecessary(date); 24 | assert.equal(date, adjusted); 25 | }); 26 | 27 | it('should NOT advance date outside of a bad range', () => { 28 | const date = moment('2022-01-01'); 29 | const adjusted = advanceDateToSkipBadDataIfNecessary(date); 30 | assert.equal(date, adjusted); 31 | }); 32 | }); 33 | -------------------------------------------------------------------------------- /test/browser-specific.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | 3 | 'use strict'; 4 | 5 | const assert = require('chai').assert; 6 | const browserSpecific = require('../lib/browser-specific'); 7 | 8 | function createEmptyTree() { 9 | return { 10 | trees: {}, 11 | tests: {}, 12 | }; 13 | } 14 | 15 | let uniqueId = 0; 16 | 17 | class TreeBuilder { 18 | constructor() { 19 | this.root = createEmptyTree(); 20 | } 21 | 22 | build() { 23 | // Time to add all the unique ids. 24 | function addUniqueIds(node) { 25 | node.id = ++uniqueId; 26 | 27 | for (const test of Object.values(node.tests)) { 28 | test.id = ++uniqueId; 29 | } 30 | for (const tree of Object.values(node.trees)) { 31 | addUniqueIds(tree); 32 | } 33 | } 34 | 35 | addUniqueIds(this.root); 36 | return this.root; 37 | } 38 | 39 | // Add a test with a given status to the tree. The path parameter is 40 | // interpreted as a directory path and subtrees are created as necessary. 41 | addTest(path, status) { 42 | let currentNode = this.root; 43 | const testParts = path.split('/'); 44 | for (let i = 0; i < testParts.length - 1; i++) { 45 | const directoryName = testParts[i]; 46 | if (!(directoryName in currentNode.trees)) { 47 | currentNode.trees[directoryName] = createEmptyTree(); 48 | } 49 | currentNode = currentNode.trees[directoryName]; 50 | } 51 | 52 | const testName = testParts[testParts.length - 1]; 53 | assert.doesNotHaveAnyKeys( 54 | currentNode.tests, testName, `tree already has a test at ${path}`); 55 | currentNode.tests[testName] = {status}; 56 | 57 | return this; 58 | } 59 | 60 | // Add a subtest with a given status to the tree. The test object must already 61 | // have been created; a subtest array will be created if necessary. 62 | addSubtest(testPath, subtest, status) { 63 | let currentNode = this.root; 64 | const testParts = testPath.split('/'); 65 | for (let i = 0; i < testParts.length - 1; i++) { 66 | currentNode = currentNode.trees[testParts[i]]; 67 | } 68 | 69 | const testName = testParts[testParts.length - 1]; 70 | const test = currentNode.tests[testName]; 71 | if (test.subtests === undefined) { 72 | test.subtests = []; 73 | } 74 | test.subtests.push({name: subtest, status}); 75 | 76 | return this; 77 | } 78 | } 79 | 80 | describe('browser-specific.js', () => { 81 | describe('Browser Validation', () => { 82 | it('should not throw if the browser list is correct', () => { 83 | const runs = [ 84 | {browser_name: 'chrome', tree: new TreeBuilder().build()}, 85 | {browser_name: 'firefox', tree: new TreeBuilder().build()}, 86 | ]; 87 | const expectedBrowsers = new Set(['chrome', 'firefox']); 88 | assert.doesNotThrow(() => { 89 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 90 | }); 91 | }); 92 | 93 | it('should throw if an expected browser is missing', () => { 94 | let runs = []; 95 | let expectedBrowsers = new Set(['chrome', 'firefox']); 96 | assert.throws(() => { 97 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 98 | }); 99 | 100 | runs = [ 101 | {browser_name: 'chrome', tree: new TreeBuilder().build()}, 102 | {browser_name: 'firefox', tree: new TreeBuilder().build()}, 103 | ]; 104 | expectedBrowsers = new Set(['chrome', 'firefox', 'safari']); 105 | assert.throws(() => { 106 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 107 | }); 108 | }); 109 | 110 | it('should throw if an unexpected browser is present', () => { 111 | let runs = [ 112 | {browser_name: 'chrome', tree: new TreeBuilder().build()}, 113 | {browser_name: 'firefox', tree: new TreeBuilder().build()}, 114 | ]; 115 | let expectedBrowsers = new Set; 116 | assert.throws(() => { 117 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 118 | }); 119 | 120 | runs = [ 121 | {browser_name: 'chrome', tree: new TreeBuilder().build()}, 122 | {browser_name: 'firefox', tree: new TreeBuilder().build()}, 123 | {browser_name: 'safari', tree: new TreeBuilder().build()}, 124 | ]; 125 | expectedBrowsers = new Set(['chrome', 'firefox']); 126 | assert.throws(() => { 127 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 128 | }); 129 | }); 130 | 131 | it('should throw if there are duplicate browsers', () => { 132 | const runs = [ 133 | {browser_name: 'chrome', tree: new TreeBuilder().build()}, 134 | {browser_name: 'firefox', tree: new TreeBuilder().build()}, 135 | {browser_name: 'chrome', tree: new TreeBuilder().build()}, 136 | ]; 137 | const expectedBrowsers = new Set(['chrome', 'firefox']); 138 | assert.throws(() => { 139 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 140 | }); 141 | }); 142 | }); 143 | 144 | describe('Scoring Runs', () => { 145 | it('should score top-level tests correctly', () => { 146 | const expectedBrowsers = new Set(['chrome', 'firefox']); 147 | 148 | // A basic case; test passes in Chrome but fails in Firefox. 149 | let chromeTree = new TreeBuilder().addTest('TestA', 'PASS').build(); 150 | let firefoxTree = new TreeBuilder().addTest('TestA', 'FAIL').build(); 151 | let runs = [ 152 | {browser_name: 'chrome', tree: chromeTree}, 153 | {browser_name: 'firefox', tree: firefoxTree}, 154 | ]; 155 | 156 | let scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 157 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 1]])); 158 | 159 | // The following are all treated as failure: FAIL, ERROR, TIMEOUT, CRASH. 160 | chromeTree = new TreeBuilder() 161 | .addTest('TestA', 'ERROR') 162 | .addTest('TestB', 'TIMEOUT') 163 | .addTest('TestC', 'CRASH') 164 | .build(); 165 | firefoxTree = new TreeBuilder() 166 | .addTest('TestA', 'PASS') 167 | .addTest('TestB', 'PASS') 168 | .addTest('TestC', 'PASS') 169 | .build(); 170 | runs = [ 171 | {browser_name: 'chrome', tree: chromeTree}, 172 | {browser_name: 'firefox', tree: firefoxTree}, 173 | ]; 174 | 175 | scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 176 | assert.deepEqual(scores, new Map([['chrome', 3], ['firefox', 0]])); 177 | 178 | // There are also 'neutral' statuses (PRECONDITION_FAILED, SKIP), which 179 | // mean a test can never be a browser-specific fail. Make sure to test 180 | // that a neutral status is not treated as either a PASS or FAIL. 181 | chromeTree = new TreeBuilder() 182 | .addTest('TestA', 'PASS') 183 | .addTest('TestB', 'FAIL') 184 | .addTest('TestC', 'PASS') 185 | .addTest('TestD', 'FAIL') 186 | .build(); 187 | firefoxTree = new TreeBuilder() 188 | .addTest('TestA', 'PRECONDITION_FAILED') 189 | .addTest('TestB', 'PRECONDITION_FAILED') 190 | .addTest('TestC', 'SKIP') 191 | .addTest('TestD', 'SKIP') 192 | .build(); 193 | runs = [ 194 | {browser_name: 'chrome', tree: chromeTree}, 195 | {browser_name: 'firefox', tree: firefoxTree}, 196 | ]; 197 | 198 | scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 199 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 0]])); 200 | }); 201 | 202 | it('should throw for an unknown top-level test status', () => { 203 | const expectedBrowsers = new Set(['chrome', 'firefox']); 204 | 205 | const chromeTree = new TreeBuilder().addTest('TestA', 'FOO').build(); 206 | const firefoxTree = new TreeBuilder().addTest('TestA', 'PASS').build(); 207 | const runs = [ 208 | {browser_name: 'chrome', tree: chromeTree}, 209 | {browser_name: 'firefox', tree: firefoxTree}, 210 | ]; 211 | 212 | assert.throws(() => { 213 | browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 214 | }); 215 | }); 216 | 217 | it('should traverse subtrees correctly', () => { 218 | const expectedBrowsers = new Set(['chrome', 'firefox']); 219 | 220 | const chromeTree = new TreeBuilder().addTest('a/b/TestA', 'FAIL').build(); 221 | const firefoxTree = new TreeBuilder().addTest('a/b/TestA', 'PASS').build(); 222 | const runs = [ 223 | {browser_name: 'chrome', tree: chromeTree}, 224 | {browser_name: 'firefox', tree: firefoxTree}, 225 | ]; 226 | 227 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 228 | assert.deepEqual(scores, new Map([['chrome', 1], ['firefox', 0]])); 229 | }); 230 | 231 | it('should normalize subtests correctly', () => { 232 | const expectedBrowsers = new Set(['chrome', 'firefox']); 233 | 234 | let chromeTree = new TreeBuilder() 235 | .addTest('TestA', 'OK') 236 | .addSubtest('TestA', 'test 1', 'PASS') 237 | .addSubtest('TestA', 'test 2', 'PASS') 238 | .addSubtest('TestA', 'test 3', 'FAIL') 239 | .addSubtest('TestA', 'test 4', 'PASS') 240 | .build(); 241 | let firefoxTree = new TreeBuilder() 242 | .addTest('TestA', 'OK') 243 | .addSubtest('TestA', 'test 1', 'FAIL') 244 | .addSubtest('TestA', 'test 2', 'FAIL') 245 | .addSubtest('TestA', 'test 3', 'PASS') 246 | .addSubtest('TestA', 'test 4', 'PASS') 247 | .build(); 248 | let runs = [ 249 | {browser_name: 'chrome', tree: chromeTree}, 250 | {browser_name: 'firefox', tree: firefoxTree}, 251 | ]; 252 | 253 | // 1/4 subtests are Chrome-only failures, and 2/4 subtests are 254 | // Firefox-only failures. 255 | let scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 256 | assert.deepEqual(scores, new Map([['chrome', 0.25], ['firefox', 0.5]])); 257 | 258 | // TIMEOUT and ERROR are also considered failure modes. 259 | chromeTree = new TreeBuilder() 260 | .addTest('TestA', 'OK') 261 | .addSubtest('TestA', 'test 1', 'TIMEOUT') 262 | .addSubtest('TestA', 'test 2', 'ERROR') 263 | .build(); 264 | firefoxTree = new TreeBuilder() 265 | .addTest('TestA', 'OK') 266 | .addSubtest('TestA', 'test 1', 'PASS') 267 | .addSubtest('TestA', 'test 2', 'PASS') 268 | .build(); 269 | runs = [ 270 | {browser_name: 'chrome', tree: chromeTree}, 271 | {browser_name: 'firefox', tree: firefoxTree}, 272 | ]; 273 | scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 274 | assert.deepEqual(scores, new Map([['chrome', 1], ['firefox', 0]])); 275 | 276 | // There are also 'neutral' statuses (PRECONDITION_FAILED, SKIP), which 277 | // mean a subtest can never be a browser-specific fail. Make sure to test 278 | // that a neutral status is not treated as either a PASS or FAIL. 279 | chromeTree = new TreeBuilder() 280 | .addTest('TestA', 'OK') 281 | .addSubtest('TestA', 'test 1', 'PRECONDITION_FAILED') 282 | .addSubtest('TestA', 'test 2', 'PRECONDITION_FAILED') 283 | .addSubtest('TestA', 'test 3', 'SKIP') 284 | .addSubtest('TestA', 'test 4', 'SKIP') 285 | .build(); 286 | firefoxTree = new TreeBuilder() 287 | .addTest('TestA', 'OK') 288 | .addSubtest('TestA', 'test 1', 'PASS') 289 | .addSubtest('TestA', 'test 2', 'PASS') 290 | .addSubtest('TestA', 'test 3', 'PASS') 291 | .addSubtest('TestA', 'test 4', 'PASS') 292 | .build(); 293 | runs = [ 294 | {browser_name: 'chrome', tree: chromeTree}, 295 | {browser_name: 'firefox', tree: firefoxTree}, 296 | ]; 297 | scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 298 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 0]])); 299 | }); 300 | 301 | it('should treat duplicate subtests as one', () => { 302 | const expectedBrowsers = new Set(['chrome', 'safari']); 303 | 304 | // This can happen due to, e.g., https://github.com/web-platform-tests/wpt/issues/12632 305 | const chromeTree = new TreeBuilder() 306 | .addTest('TestA', 'OK') 307 | .addSubtest('TestA', 'test 1', 'PASS') 308 | .addSubtest('TestA', 'test 2', 'FAIL') 309 | .build(); 310 | 311 | const safariTree = new TreeBuilder() 312 | .addTest('TestA', 'OK') 313 | .addSubtest('TestA', 'test 1', 'FAIL') 314 | .addSubtest('TestA', 'test 1', 'ERROR') 315 | .addSubtest('TestA', 'test 2', 'FAIL') 316 | .build(); 317 | 318 | const runs = [ 319 | {browser_name: 'chrome', tree: chromeTree}, 320 | {browser_name: 'safari', tree: safariTree}, 321 | ]; 322 | 323 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 324 | assert.deepEqual(scores, new Map([['chrome', 0.0], ['safari', 0.5]])); 325 | }); 326 | 327 | it('should ignore tests that arent in all browsers', () => { 328 | const expectedBrowsers = new Set(['chrome', 'firefox']); 329 | 330 | // If a test doesn't exist in all browsers, it never counts for 331 | // browser-specific failures. 332 | const chromeTree = new TreeBuilder() 333 | .addTest('TestA', 'FAIL') 334 | .addTest('TestB', 'PASS') 335 | .build(); 336 | const firefoxTree = new TreeBuilder() 337 | .addTest('TestB', 'PASS') 338 | .addTest('TestC', 'PASS') 339 | .build(); 340 | const runs = [ 341 | {browser_name: 'chrome', tree: chromeTree}, 342 | {browser_name: 'firefox', tree: firefoxTree}, 343 | ]; 344 | 345 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 346 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 0]])); 347 | }); 348 | 349 | it('should ignore tests that arent in all browsers, with confusing IDs', () => { 350 | const expectedBrowsers = new Set(['chrome', 'firefox']); 351 | 352 | // In the real code, the IDs are from git blob IDs, and thus all identical 353 | // statuses have the same ID. 354 | const PASS = { 355 | 'status': 'PASS', 356 | 'id': ++uniqueId, 357 | }; 358 | 359 | const FAIL = { 360 | 'status': 'FAIL', 361 | 'id': ++uniqueId, 362 | }; 363 | 364 | const chromeTree = { 365 | 'trees': {}, 366 | 'tests': { 367 | 'block-end-aligned-abspos-with-overflow.html': PASS, 368 | }, 369 | 'id': ++uniqueId, 370 | }; 371 | 372 | const firefoxTree = { 373 | 'trees': {}, 374 | 'tests': { 375 | // note that block-002-wm-vrl-print.html and 376 | // block-end-aligned-abspos-with-overflow.html above have the same PASS object 377 | // for their results 378 | 'block-002-wm-vrl-print.html': PASS, 379 | 'block-end-aligned-abspos-with-overflow.html': FAIL, 380 | }, 381 | 'id': ++uniqueId, 382 | }; 383 | 384 | const runs = [ 385 | {browser_name: 'chrome', tree: chromeTree}, 386 | {browser_name: 'firefox', tree: firefoxTree}, 387 | ]; 388 | 389 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 390 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 1]])); 391 | }); 392 | 393 | it('should ignore subtrees that arent in all browsers', () => { 394 | const expectedBrowsers = new Set(['chrome', 'firefox']); 395 | 396 | // If a subtree doesn't exist in all browsers, it is just ignored. 397 | const chromeTree = new TreeBuilder() 398 | .addTest('a/b/c/TestA', 'FAIL') 399 | .addTest('d/e/f/TestB', 'PASS') 400 | .build(); 401 | const firefoxTree = new TreeBuilder() 402 | .addTest('d/e/f/TestB', 'PASS') 403 | .addTest('g/h/i/TestA', 'PASS') 404 | .build(); 405 | const runs = [ 406 | {browser_name: 'chrome', tree: chromeTree}, 407 | {browser_name: 'firefox', tree: firefoxTree}, 408 | ]; 409 | 410 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 411 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 0]])); 412 | }); 413 | 414 | it('should handle subtests that arent in all browsers', () => { 415 | const expectedBrowsers = new Set(['chrome', 'firefox']); 416 | 417 | // We take the union of results when looking at subtests. This means that 418 | // a missing subtest can be a browser-specific failure, if all other 419 | // browsers have a passing result for it. Even if they don't, it still 420 | // counts for the denominator. 421 | const chromeTree = new TreeBuilder() 422 | .addTest('TestA', 'OK') 423 | .addSubtest('TestA', 'test 1', 'PASS') 424 | .addSubtest('TestA', 'test 2', 'FAIL') 425 | .addSubtest('TestA', 'test 3', 'PASS') 426 | .build(); 427 | const firefoxTree = new TreeBuilder() 428 | .addTest('TestA', 'OK') 429 | .addSubtest('TestA', 'test 2', 'PASS') 430 | .addSubtest('TestA', 'test 3', 'PASS') 431 | .addSubtest('TestA', 'test 4', 'PASS') 432 | .addSubtest('TestA', 'test 5', 'PASS') 433 | .build(); 434 | const runs = [ 435 | {browser_name: 'chrome', tree: chromeTree}, 436 | {browser_name: 'firefox', tree: firefoxTree}, 437 | ]; 438 | 439 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 440 | assert.deepEqual(scores, new Map([['chrome', 0.6], ['firefox', 0.2]])); 441 | }); 442 | 443 | it('should handle the case where one browser has no subtests for a test', () => { 444 | const expectedBrowsers = new Set(['chrome', 'firefox']); 445 | 446 | // In this case, Chrome ran the test and had a harness error, so has no 447 | // subtests. Firefox did find subtests. When one or more browsers have 448 | // subtests for a given test, but some browsers don't, we ignore the test 449 | // entirely. 450 | const chromeTree = new TreeBuilder().addTest('TestA', 'ERROR').build(); 451 | const firefoxTree = new TreeBuilder() 452 | .addTest('TestA', 'OK') 453 | .addSubtest('TestA', 'test 1', 'PASS') 454 | .addSubtest('TestA', 'test 2', 'FAIL') 455 | .addSubtest('TestA', 'test 3', 'PASS') 456 | .build(); 457 | const runs = [ 458 | {browser_name: 'chrome', tree: chromeTree}, 459 | {browser_name: 'firefox', tree: firefoxTree}, 460 | ]; 461 | 462 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 463 | assert.deepEqual(scores, new Map([['chrome', 0], ['firefox', 0]])); 464 | }); 465 | 466 | it('should handle tests differing by case', () => { 467 | const expectedBrowsers = new Set(['chrome', 'firefox', 'safari']); 468 | 469 | const chromeTree = new TreeBuilder() 470 | .addTest('TEST (upper)', 'FAIL') 471 | .addTest('test (lower)', 'PASS') 472 | .build(); 473 | 474 | const firefoxTree = new TreeBuilder() 475 | .addTest('TEST (upper)', 'PASS') 476 | .build(); 477 | 478 | const safariTree = new TreeBuilder() 479 | .addTest('TEST (upper)', 'PASS') 480 | .build(); 481 | 482 | const runs = [ 483 | {browser_name: 'chrome', tree: chromeTree}, 484 | {browser_name: 'firefox', tree: firefoxTree}, 485 | {browser_name: 'safari', tree: safariTree}, 486 | ]; 487 | 488 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 489 | assert.deepEqual(scores, new Map([['chrome', 1], ['firefox', 0], ['safari', 0]])); 490 | }); 491 | 492 | it('should handle subtests differing by case', () => { 493 | const expectedBrowsers = new Set(['chrome', 'firefox', 'safari']); 494 | 495 | const chromeTree = new TreeBuilder() 496 | .addTest('TestA', 'OK') 497 | .addSubtest('TestA', 'TEST (upper)', 'PASS') 498 | .build(); 499 | 500 | const firefoxTree = new TreeBuilder() 501 | .addTest('TestA', 'OK') 502 | .addSubtest('TestA', 'test (lower)', 'PASS') 503 | .build(); 504 | 505 | const safariTree = new TreeBuilder() 506 | .addTest('TestA', 'OK') 507 | .addSubtest('TestA', 'TEST (upper)', 'PASS') 508 | .addSubtest('TestA', 'test (lower)', 'PASS') 509 | .build(); 510 | 511 | const runs = [ 512 | {browser_name: 'chrome', tree: chromeTree}, 513 | {browser_name: 'firefox', tree: firefoxTree}, 514 | {browser_name: 'safari', tree: safariTree}, 515 | ]; 516 | 517 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 518 | assert.deepEqual(scores, new Map([['chrome', 0.5], ['firefox', 0.5], ['safari', 0.0]])); 519 | }); 520 | 521 | it('should handle subtests differing by case 2', () => { 522 | const expectedBrowsers = new Set(['chrome', 'firefox', 'safari']); 523 | 524 | const chromeTree = new TreeBuilder() 525 | .addTest('TestA', 'OK') 526 | .addSubtest('TestA', 'TEST (upper)', 'PASS') 527 | .build(); 528 | 529 | const firefoxTree = new TreeBuilder() 530 | .addTest('TestA', 'OK') 531 | .addSubtest('TestA', 'test (lower)', 'PASS') 532 | .build(); 533 | 534 | const safariTree = new TreeBuilder() 535 | .addTest('TestA', 'OK') 536 | .addSubtest('TestA', 'test (lower)', 'PASS') 537 | .addSubtest('TestA', 'TEST (upper)', 'PASS') 538 | .build(); 539 | 540 | const runs = [ 541 | {browser_name: 'chrome', tree: chromeTree}, 542 | {browser_name: 'firefox', tree: firefoxTree}, 543 | {browser_name: 'safari', tree: safariTree}, 544 | ]; 545 | 546 | const scores = browserSpecific.scoreBrowserSpecificFailures(runs, expectedBrowsers); 547 | assert.deepEqual(scores, new Map([['chrome', 0.5], ['firefox', 0.5], ['safari', 0.0]])); 548 | }); 549 | }); 550 | }); 551 | --------------------------------------------------------------------------------