├── .github
└── FUNDING.yml
├── .gitignore
├── README.md
├── opportunities
├── 01_initialization.sql
├── 02_aggregation.sql
├── 03_impact.sql
├── 04_opportunities.sql
├── README.md
└── lighthouse_cwv.js
└── sql
├── all.sql
└── monthly.sql
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: httparchive
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.DS_Store
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | **This repository is deprecated.** Please see [HTTPArchive/dataform](https://github.com/HTTPArchive/dataform) for the next version of CWV Tech Report data pipeline.
2 |
3 | # Core Web Vitals Technology Report
4 |
5 |
6 |
7 | Public dashboard: https://cwvtech.report/
8 |
9 | Methodology: https://discuss.httparchive.org/t/new-dashboard-the-core-web-vitals-technology-report/2178
10 |
--------------------------------------------------------------------------------
/opportunities/01_initialization.sql:
--------------------------------------------------------------------------------
1 | # Note: you'll need to manually configure this query in the BigQuery UI to overwrite the contents of the `httparchive.scratchspace.lh_cwv` table.
2 |
3 | CREATE TEMP FUNCTION IS_INTERESTING(app STRING) RETURNS BOOL AS (
4 | app IN (
5 | # JavaScript frameworks and libraries
6 | 'React', 'Next.js', 'Angular', 'Vue.js', 'Nuxt.js', 'Svelte', 'SvelteKit', 'Preact',
7 | # CMSs
8 | 'WordPress', 'Drupal', 'Joomla', 'Wix', 'Squarespace', 'Duda', 'Shopify', 'Elementor')
9 | );
10 |
11 | # This content is generated from lighthouse_cwv.js.
12 | CREATE TEMP FUNCTION GET_AUDITS(lhr STRING) RETURNS ARRAY> AS (
13 | [
14 | STRUCT('FID' AS metric, 'third-party-summary' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-summary".score') AS FLOAT64) >= 0.9 AS passing),
15 | STRUCT('FID' AS metric, 'third-party-facades' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-facades".score') AS FLOAT64) >= 0.9 AS passing),
16 | STRUCT('FID' AS metric, 'bootup-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."bootup-time".score') AS FLOAT64) >= 0.9 AS passing),
17 | STRUCT('FID' AS metric, 'mainthread-work-breakdown' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."mainthread-work-breakdown".score') AS FLOAT64) >= 0.9 AS passing),
18 | STRUCT('FID' AS metric, 'dom-size' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."dom-size".score') AS FLOAT64) >= 0.9 AS passing),
19 | STRUCT('FID' AS metric, 'duplicated-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."duplicated-javascript".score') AS FLOAT64) >= 0.9 AS passing),
20 | STRUCT('FID' AS metric, 'legacy-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."legacy-javascript".score') AS FLOAT64) >= 0.9 AS passing),
21 | STRUCT('FID' AS metric, 'viewport' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."viewport".score') AS FLOAT64) >= 0.9 AS passing),
22 | STRUCT('LCP' AS metric, 'server-response-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."server-response-time".score') AS FLOAT64) >= 0.9 AS passing),
23 | STRUCT('LCP' AS metric, 'render-blocking-resources' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."render-blocking-resources".score') AS FLOAT64) >= 0.9 AS passing),
24 | STRUCT('LCP' AS metric, 'redirects' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."redirects".score') AS FLOAT64) >= 0.9 AS passing),
25 | STRUCT('LCP' AS metric, 'uses-text-compression' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-text-compression".score') AS FLOAT64) >= 0.9 AS passing),
26 | STRUCT('LCP' AS metric, 'uses-rel-preconnect' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preconnect".score') AS FLOAT64) >= 0.9 AS passing),
27 | STRUCT('LCP' AS metric, 'uses-rel-preload' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preload".score') AS FLOAT64) >= 0.9 AS passing),
28 | STRUCT('LCP' AS metric, 'font-display' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."font-display".score') AS FLOAT64) >= 0.9 AS passing),
29 | STRUCT('LCP' AS metric, 'unminified-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-javascript".score') AS FLOAT64) >= 0.9 AS passing),
30 | STRUCT('LCP' AS metric, 'unminified-css' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-css".score') AS FLOAT64) >= 0.9 AS passing),
31 | STRUCT('LCP' AS metric, 'unused-css-rules' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-css-rules".score') AS FLOAT64) >= 0.9 AS passing),
32 | STRUCT('LCP' AS metric, 'preload-lcp-image' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."preload-lcp-image".score') AS FLOAT64) >= 0.9 AS passing),
33 | STRUCT('LCP' AS metric, 'unused-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-javascript".score') AS FLOAT64) >= 0.9 AS passing),
34 | STRUCT('LCP' AS metric, 'efficient-animated-content' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."efficient-animated-content".score') AS FLOAT64) >= 0.9 AS passing),
35 | STRUCT('LCP' AS metric, 'total-byte-weight' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."total-byte-weight".score') AS FLOAT64) >= 0.9 AS passing),
36 | STRUCT('LCP' AS metric, 'uses-optimized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-optimized-images".score') AS FLOAT64) >= 0.9 AS passing),
37 | STRUCT('LCP' AS metric, 'uses-responsive-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-responsive-images".score') AS FLOAT64) >= 0.9 AS passing),
38 | STRUCT('LCP' AS metric, 'modern-image-formats' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."modern-image-formats".score') AS FLOAT64) >= 0.9 AS passing),
39 | STRUCT('CLS' AS metric, 'unsized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unsized-images".score') AS FLOAT64) >= 0.9 AS passing)
40 | ]
41 | );
42 |
43 | CREATE TEMP FUNCTION GET_PCT_GOOD_LCP(har STRING) RETURNS NUMERIC AS (
44 | SAFE_CAST(JSON_VALUE(har, '$._CrUX.metrics.largest_contentful_paint.percentiles.p75') AS NUMERIC)
45 | );
46 | CREATE TEMP FUNCTION GET_PCT_GOOD_CLS(har STRING) RETURNS NUMERIC AS (
47 | SAFE_CAST(JSON_VALUE(har, '$._CrUX.metrics.cumulative_layout_shift.percentiles.p75') AS NUMERIC)
48 | );
49 | CREATE TEMP FUNCTION GET_PCT_GOOD_FID(har STRING) RETURNS NUMERIC AS (
50 | SAFE_CAST(JSON_VALUE(har, '$._CrUX.metrics.first_input_delay.percentiles.p75') AS NUMERIC)
51 | );
52 |
53 | CREATE TEMP FUNCTION GET_LAB_LCP(har STRING) RETURNS NUMERIC AS (
54 | CAST(JSON_VALUE(har, '$."_chromeUserTiming.LargestContentfulPaint"') AS NUMERIC)
55 | );
56 | CREATE TEMP FUNCTION GET_LAB_CLS(har STRING) RETURNS NUMERIC AS (
57 | CAST(JSON_VALUE(har, '$."_chromeUserTiming.CumulativeLayoutShift"') AS NUMERIC)
58 | );
59 | CREATE TEMP FUNCTION GET_LAB_TBT(har STRING) RETURNS NUMERIC AS (
60 | CAST(JSON_VALUE(har, '$."_TotalBlockingTime"') AS NUMERIC)
61 | );
62 |
63 | CREATE TEMP FUNCTION GET_CWV(har STRING) RETURNS ARRAY> AS (
64 | [
65 | STRUCT('field' AS env, 'LCP' AS metric, GET_PCT_GOOD_LCP(har) AS value),
66 | STRUCT('field' AS env, 'FID' AS metric, GET_PCT_GOOD_FID(har) AS value),
67 | STRUCT('field' AS env, 'CLS' AS metric, GET_PCT_GOOD_CLS(har) AS value),
68 | STRUCT('lab' AS env, 'LCP' AS metric, GET_LAB_LCP(har) AS value),
69 | STRUCT('lab' AS env, 'FID' AS metric, GET_LAB_TBT(har) AS value),
70 | STRUCT('lab' AS env, 'CLS' AS metric, GET_LAB_CLS(har) AS value)
71 | ]
72 | );
73 |
74 | CREATE TEMP FUNCTION CALC_PCT_DIFF(before NUMERIC, after NUMERIC) RETURNS NUMERIC AS (
75 | SAFE_DIVIDE(after - before, before)
76 | );
77 |
78 | WITH technologies AS (
79 | SELECT DISTINCT
80 | url,
81 | app AS technology
82 | FROM
83 | `httparchive.technologies.2022_05_01_mobile`
84 | WHERE
85 | IS_INTERESTING(app)
86 | ), audits AS (
87 | SELECT
88 | url,
89 | metric,
90 | audit,
91 | after.passing AS improved
92 | FROM (
93 | SELECT
94 | url,
95 | audit.*
96 | FROM
97 | `httparchive.lighthouse.2021_04_01_mobile`,
98 | UNNEST(GET_AUDITS(report)) AS audit
99 | UNION ALL
100 | SELECT
101 | url,
102 | audit.*
103 | FROM
104 | `httparchive.lighthouse.2021_06_01_mobile`,
105 | UNNEST([
106 | STRUCT('LCP' AS metric, 'modern-image-formats' AS audit, SAFE_CAST(JSON_VALUE(report, '$.audits."modern-image-formats".score') AS FLOAT64) >= 0.9 AS passing)
107 | ]) AS audit) AS before
108 | JOIN (
109 | SELECT
110 | url,
111 | audit.*
112 | FROM
113 | `httparchive.lighthouse.2022_05_01_mobile`,
114 | UNNEST(GET_AUDITS(report)) AS audit) AS after
115 | USING
116 | (url, metric, audit)
117 | WHERE
118 | before.passing != after.passing AND
119 | before.passing IS NOT NULL AND
120 | after.passing IS NOT NULL
121 | ), cwv AS (
122 | SELECT
123 | url,
124 | env,
125 | metric,
126 | before.value AS before,
127 | after.value AS after,
128 | CALC_PCT_DIFF(before.value, after.value) AS pct_diff
129 | FROM (
130 | SELECT
131 | url,
132 | env,
133 | metric,
134 | value
135 | FROM
136 | `httparchive.pages.2021_04_01_mobile`,
137 | UNNEST(GET_CWV(payload))) AS before
138 | JOIN (
139 | SELECT
140 | url,
141 | env,
142 | metric,
143 | value
144 | FROM
145 | `httparchive.pages.2022_05_01_mobile`,
146 | UNNEST(GET_CWV(payload))) AS after
147 | USING
148 | (url, env, metric)
149 | )
150 |
151 | SELECT
152 | *
153 | FROM
154 | technologies
155 | JOIN
156 | audits
157 | USING
158 | (url)
159 | JOIN
160 | cwv
161 | USING
162 | (url, metric)
163 | WHERE
164 | pct_diff IS NOT NULL
165 |
--------------------------------------------------------------------------------
/opportunities/02_aggregation.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE TABLE `httparchive.scratchspace.lh_cwv_stats` AS
2 | SELECT
3 | percentile,
4 | technology,
5 | metric,
6 | audit,
7 | env,
8 | improved,
9 | COUNT(0) AS pages,
10 | APPROX_QUANTILES(pct_diff, 1000)[OFFSET(percentile * 10)] AS pct_diff
11 | FROM
12 | `httparchive.scratchspace.lh_cwv`,
13 | UNNEST(GENERATE_ARRAY(1, 100, 1)) AS percentile
14 | GROUP BY
15 | percentile,
16 | technology,
17 | metric,
18 | audit,
19 | env,
20 | improved
21 |
--------------------------------------------------------------------------------
/opportunities/03_impact.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE TABLE `httparchive.scratchspace.lh_cwv_stats2` AS
2 | SELECT
3 | technology,
4 | metric,
5 | audit,
6 | env,
7 | passed.pages + failed.pages AS pages,
8 | passed.pct_diff - failed.pct_diff AS estimated_passing_impact_pct
9 | FROM (
10 | SELECT
11 | technology,
12 | metric,
13 | audit,
14 | env,
15 | pages,
16 | pct_diff
17 | FROM
18 | `httparchive.scratchspace.lh_cwv_stats`
19 | WHERE
20 | percentile = 50 AND
21 | NOT improved) AS failed
22 | JOIN (
23 | SELECT
24 | technology,
25 | metric,
26 | audit,
27 | env,
28 | pages,
29 | pct_diff
30 | FROM
31 | `httparchive.scratchspace.lh_cwv_stats`
32 | WHERE
33 | percentile = 50 AND
34 | improved) AS passed
35 | USING
36 | (technology, metric, audit, env)
37 | ORDER BY
38 | pages DESC
39 |
--------------------------------------------------------------------------------
/opportunities/04_opportunities.sql:
--------------------------------------------------------------------------------
1 | CREATE TEMP FUNCTION IS_INTERESTING(app STRING) RETURNS BOOL AS (
2 | app IN (
3 | # JavaScript frameworks and libraries
4 | 'React', 'Next.js', 'Angular', 'Vue.js', 'Nuxt.js', 'Svelte', 'SvelteKit', 'Preact',
5 | # CMSs
6 | 'WordPress', 'Drupal', 'Joomla', 'Wix', 'Squarespace', 'Duda', 'Shopify', 'Elementor')
7 | );
8 |
9 | CREATE TEMP FUNCTION GET_AUDITS(lhr STRING) RETURNS ARRAY> AS (
10 | [
11 | STRUCT('FID' AS metric, 'third-party-summary' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-summary".score') AS FLOAT64) >= 0.9 AS passing),
12 | STRUCT('FID' AS metric, 'third-party-facades' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-facades".score') AS FLOAT64) >= 0.9 AS passing),
13 | STRUCT('FID' AS metric, 'bootup-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."bootup-time".score') AS FLOAT64) >= 0.9 AS passing),
14 | STRUCT('FID' AS metric, 'mainthread-work-breakdown' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."mainthread-work-breakdown".score') AS FLOAT64) >= 0.9 AS passing),
15 | STRUCT('FID' AS metric, 'dom-size' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."dom-size".score') AS FLOAT64) >= 0.9 AS passing),
16 | STRUCT('FID' AS metric, 'duplicated-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."duplicated-javascript".score') AS FLOAT64) >= 0.9 AS passing),
17 | STRUCT('FID' AS metric, 'legacy-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."legacy-javascript".score') AS FLOAT64) >= 0.9 AS passing),
18 | STRUCT('FID' AS metric, 'viewport' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."viewport".score') AS FLOAT64) >= 0.9 AS passing),
19 | STRUCT('LCP' AS metric, 'server-response-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."server-response-time".score') AS FLOAT64) >= 0.9 AS passing),
20 | STRUCT('LCP' AS metric, 'render-blocking-resources' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."render-blocking-resources".score') AS FLOAT64) >= 0.9 AS passing),
21 | STRUCT('LCP' AS metric, 'redirects' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."redirects".score') AS FLOAT64) >= 0.9 AS passing),
22 | STRUCT('LCP' AS metric, 'uses-text-compression' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-text-compression".score') AS FLOAT64) >= 0.9 AS passing),
23 | STRUCT('LCP' AS metric, 'uses-rel-preconnect' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preconnect".score') AS FLOAT64) >= 0.9 AS passing),
24 | STRUCT('LCP' AS metric, 'uses-rel-preload' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preload".score') AS FLOAT64) >= 0.9 AS passing),
25 | STRUCT('LCP' AS metric, 'font-display' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."font-display".score') AS FLOAT64) >= 0.9 AS passing),
26 | STRUCT('LCP' AS metric, 'unminified-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-javascript".score') AS FLOAT64) >= 0.9 AS passing),
27 | STRUCT('LCP' AS metric, 'unminified-css' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-css".score') AS FLOAT64) >= 0.9 AS passing),
28 | STRUCT('LCP' AS metric, 'unused-css-rules' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-css-rules".score') AS FLOAT64) >= 0.9 AS passing),
29 | STRUCT('LCP' AS metric, 'preload-lcp-image' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."preload-lcp-image".score') AS FLOAT64) >= 0.9 AS passing),
30 | STRUCT('LCP' AS metric, 'unused-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-javascript".score') AS FLOAT64) >= 0.9 AS passing),
31 | STRUCT('LCP' AS metric, 'efficient-animated-content' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."efficient-animated-content".score') AS FLOAT64) >= 0.9 AS passing),
32 | STRUCT('LCP' AS metric, 'total-byte-weight' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."total-byte-weight".score') AS FLOAT64) >= 0.9 AS passing),
33 | STRUCT('LCP' AS metric, 'uses-optimized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-optimized-images".score') AS FLOAT64) >= 0.9 AS passing),
34 | STRUCT('LCP' AS metric, 'uses-responsive-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-responsive-images".score') AS FLOAT64) >= 0.9 AS passing),
35 | STRUCT('LCP' AS metric, 'modern-image-formats' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."modern-image-formats".score') AS FLOAT64) >= 0.9 AS passing),
36 | STRUCT('CLS' AS metric, 'unsized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unsized-images".score') AS FLOAT64) >= 0.9 AS passing)
37 | ]
38 | );
39 |
40 | WITH audits_with_impact AS (
41 | SELECT DISTINCT
42 | technology,
43 | audit
44 | FROM
45 | `httparchive.scratchspace.lh_cwv_stats`
46 | ), technologies AS (
47 | SELECT DISTINCT
48 | url,
49 | app AS technology,
50 | COUNT(DISTINCT url) OVER (PARTITION BY app) AS total
51 | FROM
52 | `httparchive.technologies.2022_05_01_mobile`
53 | WHERE
54 | IS_INTERESTING(app)
55 | ), ranks AS (
56 | SELECT
57 | url,
58 | CASE rank
59 | WHEN 1000 THEN 'head'
60 | WHEN 10000 THEN 'torso'
61 | WHEN 100000 THEN 'torso'
62 | WHEN 1000000 THEN 'torso'
63 | ELSE 'tail'
64 | END AS rank
65 | FROM
66 | `httparchive.summary_pages.2022_05_01_mobile`
67 | ), failing_audits AS (
68 | SELECT
69 | audit.audit,
70 | url
71 | FROM
72 | `httparchive.lighthouse.2022_05_01_mobile`,
73 | UNNEST(GET_AUDITS(report)) AS audit
74 | WHERE
75 | NOT passing
76 | ), totals_per_rank_technology AS (
77 | SELECT
78 | technology,
79 | rank,
80 | COUNT(DISTINCT url) AS total_per_rank
81 | FROM
82 | ranks
83 | JOIN
84 | technologies
85 | USING
86 | (url)
87 | GROUP BY
88 | technology,
89 | rank
90 | )
91 |
92 | SELECT
93 | technology,
94 | audit,
95 | rank,
96 | COUNT(0) AS failing,
97 | ANY_VALUE(total) AS total_technology,
98 | COUNT(0) / ANY_VALUE(total) AS pct_failing_technology,
99 | ANY_VALUE(total_per_rank) AS total_per_rank,
100 | COUNT(0) / ANY_VALUE(total_per_rank) AS pct_failing_per_technology_rank
101 | FROM
102 | audits_with_impact
103 | JOIN
104 | technologies
105 | USING
106 | (technology)
107 | JOIN
108 | failing_audits
109 | USING
110 | (audit, url)
111 | JOIN
112 | ranks
113 | USING
114 | (url)
115 | JOIN
116 | totals_per_rank_technology
117 | USING
118 | (technology, rank)
119 | GROUP BY
120 | technology,
121 | audit,
122 | rank
123 | ORDER BY
124 | total_technology DESC
125 |
--------------------------------------------------------------------------------
/opportunities/README.md:
--------------------------------------------------------------------------------
1 | # CWV opportunities
2 |
3 | The purpose of this project is to supplement the CWV Technology Report with a dashboard to assess the opportunities for each technology.
4 |
5 | - Stage 1 initializes the data for the particular technologies/audits of interest
6 | - lighthouse_cwv.js generates the corresponding SQL for the audit map
7 | - Stage 2 aggregates the results from Stage 1 into discrete percentiles
8 | - Stage 3 calculates the estimated impact from the medians in Stage 2
9 | - Stage 4 calculates the remaining opportunity based on audit failure rates
10 |
--------------------------------------------------------------------------------
/opportunities/lighthouse_cwv.js:
--------------------------------------------------------------------------------
1 | // This script assumes that there exists a `lhr` object containing a Lighthouse Report.
2 | // It generates the SQL needed to extract the before/after status for each audit, used by 01_initialization.sql.
3 |
4 | lhr.categories.performance.auditRefs.filter(ar => {
5 | return ['TBT', 'LCP', 'CLS'].includes(ar.acronym);
6 | }).map(ar => {
7 | return [ar.acronym, ar.relevantAudits];
8 | }).map(([metric, ras]) => {
9 | return [
10 | metric,
11 | ras.map(ra => {
12 | return lhr.audits[ra];
13 | }).filter(a => {
14 | return a.scoreDisplayMode != 'informative';
15 | })
16 | ];
17 | }).flatMap(([metric, audits]) => {
18 | return audits.map(a => {
19 | return `STRUCT('${metric == 'TBT' ? 'FID' : metric}' AS metric, '${a.id}' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."${a.id}".score') AS FLOAT64) >= 0.9 AS passing)`;
20 | });
21 | }).join(',\n');
22 |
--------------------------------------------------------------------------------
/sql/all.sql:
--------------------------------------------------------------------------------
1 | CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
2 | SAFE_DIVIDE(good, good + needs_improvement + poor) >= 0.75
3 | );
4 |
5 | CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
6 | good + needs_improvement + poor > 0
7 | );
8 |
9 | CREATE TEMP FUNCTION GET_LIGHTHOUSE_CATEGORY_SCORES(categories STRING)
10 | RETURNS STRUCT
11 | LANGUAGE js AS '''
12 | try {
13 | const $ = JSON.parse(categories);
14 | return {
15 | accessibility: $.accessibility?.score,
16 | 'best-practices': $['best-practices']?.score,
17 | performance: $.performance?.score,
18 | pwa: $.pwa?.score,
19 | seo: $.seo?.score
20 | };
21 | } catch (e) {
22 | return {};
23 | }
24 | ''';
25 |
26 | WITH latest_technologies AS (
27 | SELECT
28 | category,
29 | app
30 | FROM
31 | `httparchive.technologies.2022_04_01_mobile`
32 | ), geo_summary AS (
33 | SELECT
34 | CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\d{4})(\d{2})', r'\1-\2-01') AS DATE) AS date,
35 | * EXCEPT (country_code),
36 | `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS geo
37 | FROM
38 | `chrome-ux-report.materialized.country_summary`
39 | UNION ALL
40 | SELECT
41 | * EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin),
42 | 'ALL' AS geo
43 | FROM
44 | `chrome-ux-report.materialized.device_summary`
45 | ), crux AS (
46 | SELECT
47 | date,
48 | geo,
49 | CASE _rank
50 | WHEN 100000000 THEN 'ALL'
51 | WHEN 10000000 THEN 'Top 10M'
52 | WHEN 1000000 THEN 'Top 1M'
53 | WHEN 100000 THEN 'Top 100k'
54 | WHEN 10000 THEN 'Top 10k'
55 | WHEN 1000 THEN 'Top 1k'
56 | END AS rank,
57 | CONCAT(origin, '/') AS url,
58 | IF(device = 'desktop', 'desktop', 'mobile') AS client,
59 |
60 | # CWV
61 | IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AS any_fid,
62 | IS_GOOD(fast_fid, avg_fid, slow_fid) AS good_fid,
63 | IS_NON_ZERO(small_cls, medium_cls, large_cls) AS any_cls,
64 | IS_GOOD(small_cls, medium_cls, large_cls) AS good_cls,
65 | IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AS any_lcp,
66 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_lcp,
67 |
68 | (IS_GOOD(fast_inp, avg_inp, slow_inp) OR fast_inp IS NULL) AND
69 | IS_GOOD(small_cls, medium_cls, large_cls) AND
70 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2024,
71 |
72 | (IS_GOOD(fast_fid, avg_fid, slow_fid) OR fast_fid IS NULL) AND
73 | IS_GOOD(small_cls, medium_cls, large_cls) AND
74 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2023,
75 |
76 | # WV
77 | IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp) AS any_fcp,
78 | IS_GOOD(fast_fcp, avg_fcp, slow_fcp) AS good_fcp,
79 | IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb) AS any_ttfb,
80 | IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb) AS good_ttfb,
81 | IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AS any_inp,
82 | IS_GOOD(fast_inp, avg_inp, slow_inp) AS good_inp
83 | FROM
84 | geo_summary,
85 | UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank
86 | WHERE
87 | date >= '2020-01-01' AND
88 | device IN ('desktop', 'phone') AND
89 | (rank <= _rank OR (rank IS NULL AND _rank = 100000000))
90 | ), technologies AS (
91 | SELECT DISTINCT
92 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date,
93 | app,
94 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
95 | url
96 | FROM
97 | `httparchive.technologies.202*`
98 | WHERE
99 | app IS NOT NULL AND
100 | app != ''
101 | UNION ALL
102 | SELECT
103 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date,
104 | 'ALL' AS app,
105 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
106 | url
107 | FROM
108 | `httparchive.summary_pages.202*`
109 | ), categories AS (
110 | SELECT
111 | app,
112 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
113 | FROM
114 | latest_technologies
115 | GROUP BY
116 | app
117 | UNION ALL
118 | SELECT
119 | 'ALL' AS app,
120 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
121 | FROM
122 | latest_technologies
123 | ), summary_stats AS (
124 | SELECT
125 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date,
126 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
127 | url,
128 | bytesTotal,
129 | bytesJS,
130 | bytesImg
131 | FROM
132 | `httparchive.summary_pages.202*`
133 | ), lighthouse AS (
134 | SELECT
135 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date,
136 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
137 | url,
138 | GET_LIGHTHOUSE_CATEGORY_SCORES(JSON_QUERY(report, '$.categories')) AS lighthouse_category
139 | FROM
140 | `httparchive.lighthouse.202*`
141 | )
142 |
143 |
144 | SELECT
145 | date,
146 | geo,
147 | rank,
148 | ANY_VALUE(category) AS category,
149 | app,
150 | client,
151 | COUNT(0) AS origins,
152 |
153 | # CrUX data
154 | COUNTIF(good_fid) AS origins_with_good_fid,
155 | COUNTIF(good_cls) AS origins_with_good_cls,
156 | COUNTIF(good_lcp) AS origins_with_good_lcp,
157 | COUNTIF(good_fcp) AS origins_with_good_fcp,
158 | COUNTIF(good_ttfb) AS origins_with_good_ttfb,
159 | COUNTIF(good_inp) AS origins_with_good_inp,
160 | COUNTIF(any_fid) AS origins_with_any_fid,
161 | COUNTIF(any_cls) AS origins_with_any_cls,
162 | COUNTIF(any_lcp) AS origins_with_any_lcp,
163 | COUNTIF(any_fcp) AS origins_with_any_fcp,
164 | COUNTIF(any_ttfb) AS origins_with_any_ttfb,
165 | COUNTIF(any_inp) AS origins_with_any_inp,
166 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv,
167 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv_2024,
168 | COUNTIF(good_cwv_2023) AS origins_with_good_cwv_2023,
169 | COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv,
170 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv,
171 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2024,
172 | SAFE_DIVIDE(COUNTIF(good_cwv_2023), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2023,
173 |
174 | # Lighthouse data
175 | APPROX_QUANTILES(lighthouse_category.accessibility, 1000)[OFFSET(500)] AS median_lighthouse_score_accessibility,
176 | APPROX_QUANTILES(lighthouse_category.best_practices, 1000)[OFFSET(500)] AS median_lighthouse_score_best_practices,
177 | APPROX_QUANTILES(lighthouse_category.performance, 1000)[OFFSET(500)] AS median_lighthouse_score_performance,
178 | APPROX_QUANTILES(lighthouse_category.pwa, 1000)[OFFSET(500)] AS median_lighthouse_score_pwa,
179 | APPROX_QUANTILES(lighthouse_category.seo, 1000)[OFFSET(500)] AS median_lighthouse_score_seo,
180 |
181 | # Page weight stats
182 | APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS median_bytes_total,
183 | APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS median_bytes_js,
184 | APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS median_bytes_image
185 |
186 | FROM
187 | technologies
188 | JOIN
189 | categories
190 | USING
191 | (app)
192 | JOIN
193 | summary_stats
194 | USING
195 | (date, client, url)
196 | LEFT JOIN
197 | lighthouse
198 | USING
199 | (date, client, url)
200 | JOIN
201 | crux
202 | USING
203 | (date, client, url)
204 | GROUP BY
205 | date,
206 | rank,
207 | geo,
208 | app,
209 | client
210 |
--------------------------------------------------------------------------------
/sql/monthly.sql:
--------------------------------------------------------------------------------
1 | # UPDATE THIS EACH MONTH
2 | DECLARE _YYYYMMDD DATE DEFAULT '2024-02-01';
3 |
4 |
5 | CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
6 | SAFE_DIVIDE(good, good + needs_improvement + poor) >= 0.75
7 | );
8 |
9 | CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
10 | good + needs_improvement + poor > 0
11 | );
12 |
13 | CREATE TEMP FUNCTION GET_LIGHTHOUSE_CATEGORY_SCORES(categories STRING)
14 | RETURNS STRUCT
15 | LANGUAGE js AS '''
16 | try {
17 | const $ = JSON.parse(categories);
18 | return {
19 | accessibility: $.accessibility?.score,
20 | best_practices: $['best-practices']?.score,
21 | performance: $.performance?.score,
22 | pwa: $.pwa?.score,
23 | seo: $.seo?.score
24 | };
25 | } catch (e) {
26 | return {};
27 | }
28 | ''';
29 |
30 | INSERT INTO
31 | `httparchive.core_web_vitals.technologies`
32 |
33 | WITH geo_summary AS (
34 | SELECT
35 | CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\d{4})(\d{2})', r'\1-\2-01') AS DATE) AS date,
36 | * EXCEPT (country_code),
37 | `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS geo
38 | FROM
39 | `chrome-ux-report.materialized.country_summary`
40 | WHERE
41 | yyyymm = CAST(FORMAT_DATE('%Y%m', _YYYYMMDD) AS INT64) AND
42 | device IN ('desktop', 'phone')
43 | UNION ALL
44 | SELECT
45 | * EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin),
46 | 'ALL' AS geo
47 | FROM
48 | `chrome-ux-report.materialized.device_summary`
49 | WHERE
50 | date = _YYYYMMDD AND
51 | device IN ('desktop', 'phone')
52 | ),
53 |
54 | crux AS (
55 | SELECT
56 | geo,
57 | CASE _rank
58 | WHEN 100000000 THEN 'ALL'
59 | WHEN 10000000 THEN 'Top 10M'
60 | WHEN 1000000 THEN 'Top 1M'
61 | WHEN 100000 THEN 'Top 100k'
62 | WHEN 10000 THEN 'Top 10k'
63 | WHEN 1000 THEN 'Top 1k'
64 | END AS rank,
65 | CONCAT(origin, '/') AS root_page_url,
66 | IF(device = 'desktop', 'desktop', 'mobile') AS client,
67 |
68 | # CWV
69 | IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AS any_fid,
70 | IS_GOOD(fast_fid, avg_fid, slow_fid) AS good_fid,
71 | IS_NON_ZERO(small_cls, medium_cls, large_cls) AS any_cls,
72 | IS_GOOD(small_cls, medium_cls, large_cls) AS good_cls,
73 | IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AS any_lcp,
74 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_lcp,
75 |
76 | (IS_GOOD(fast_inp, avg_inp, slow_inp) OR fast_inp IS NULL) AND
77 | IS_GOOD(small_cls, medium_cls, large_cls) AND
78 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2024,
79 |
80 | (IS_GOOD(fast_fid, avg_fid, slow_fid) OR fast_fid IS NULL) AND
81 | IS_GOOD(small_cls, medium_cls, large_cls) AND
82 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2023,
83 |
84 | # WV
85 | IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp) AS any_fcp,
86 | IS_GOOD(fast_fcp, avg_fcp, slow_fcp) AS good_fcp,
87 | IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb) AS any_ttfb,
88 | IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb) AS good_ttfb,
89 | IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AS any_inp,
90 | IS_GOOD(fast_inp, avg_inp, slow_inp) AS good_inp
91 | FROM
92 | geo_summary,
93 | UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank
94 | WHERE
95 | rank <= _rank
96 | ),
97 |
98 | technologies AS (
99 | SELECT
100 | technology.technology AS app,
101 | client,
102 | page AS url
103 | FROM
104 | `httparchive.all.pages`,
105 | UNNEST(technologies) AS technology
106 | WHERE
107 | date = _YYYYMMDD AND
108 | technology.technology IS NOT NULL AND
109 | technology.technology != ''
110 | UNION ALL
111 | SELECT
112 | 'ALL' AS app,
113 | client,
114 | page AS url
115 | FROM
116 | `httparchive.all.pages`
117 | WHERE
118 | date = _YYYYMMDD
119 | ),
120 |
121 | categories AS (
122 | SELECT
123 | technology.technology AS app,
124 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
125 | FROM
126 | `httparchive.all.pages`,
127 | UNNEST(technologies) AS technology,
128 | UNNEST(technology.categories) AS category
129 | WHERE
130 | date = _YYYYMMDD
131 | GROUP BY
132 | app
133 | UNION ALL
134 | SELECT
135 | 'ALL' AS app,
136 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
137 | FROM
138 | `httparchive.all.pages`,
139 | UNNEST(technologies) AS technology,
140 | UNNEST(technology.categories) AS category
141 | WHERE
142 | date = _YYYYMMDD AND
143 | client = 'mobile'
144 | ),
145 |
146 | summary_stats AS (
147 | SELECT
148 | client,
149 | page AS url,
150 | root_page AS root_page_url,
151 | CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal,
152 | CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS,
153 | CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg,
154 | GET_LIGHTHOUSE_CATEGORY_SCORES(JSON_QUERY(lighthouse, '$.categories')) AS lighthouse_category
155 | FROM
156 | `httparchive.all.pages`
157 | WHERE
158 | date = _YYYYMMDD
159 | ),
160 |
161 | lab_data AS (
162 | SELECT
163 | client,
164 | root_page_url,
165 | app,
166 | ANY_VALUE(category) AS category,
167 | CAST(AVG(bytesTotal) AS INT64) AS bytesTotal,
168 | CAST(AVG(bytesJS) AS INT64) AS bytesJS,
169 | CAST(AVG(bytesImg) AS INT64) AS bytesImg,
170 | CAST(AVG(lighthouse_category.accessibility) AS NUMERIC) AS accessibility,
171 | CAST(AVG(lighthouse_category.best_practices) AS NUMERIC) AS best_practices,
172 | CAST(AVG(lighthouse_category.performance) AS NUMERIC) AS performance,
173 | CAST(AVG(lighthouse_category.pwa) AS NUMERIC) AS pwa,
174 | CAST(AVG(lighthouse_category.seo) AS NUMERIC) AS seo
175 | FROM
176 | summary_stats
177 | JOIN
178 | technologies
179 | USING
180 | (client, url)
181 | JOIN
182 | categories
183 | USING
184 | (app)
185 | GROUP BY
186 | client,
187 | root_page_url,
188 | app
189 | )
190 |
191 |
192 | SELECT
193 | _YYYYMMDD AS date,
194 | geo,
195 | rank,
196 | ANY_VALUE(category) AS category,
197 | app,
198 | client,
199 | COUNT(0) AS origins,
200 |
201 | # CrUX data
202 | COUNTIF(good_fid) AS origins_with_good_fid,
203 | COUNTIF(good_cls) AS origins_with_good_cls,
204 | COUNTIF(good_lcp) AS origins_with_good_lcp,
205 | COUNTIF(good_fcp) AS origins_with_good_fcp,
206 | COUNTIF(good_ttfb) AS origins_with_good_ttfb,
207 | COUNTIF(good_inp) AS origins_with_good_inp,
208 | COUNTIF(any_fid) AS origins_with_any_fid,
209 | COUNTIF(any_cls) AS origins_with_any_cls,
210 | COUNTIF(any_lcp) AS origins_with_any_lcp,
211 | COUNTIF(any_fcp) AS origins_with_any_fcp,
212 | COUNTIF(any_ttfb) AS origins_with_any_ttfb,
213 | COUNTIF(any_inp) AS origins_with_any_inp,
214 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv,
215 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv_2024,
216 | COUNTIF(good_cwv_2023) AS origins_with_good_cwv_2023,
217 | COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv,
218 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv,
219 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2024,
220 | SAFE_DIVIDE(COUNTIF(good_cwv_2023), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2023,
221 |
222 | # Lighthouse data
223 | APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS median_lighthouse_score_accessibility,
224 | APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS median_lighthouse_score_best_practices,
225 | APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS median_lighthouse_score_performance,
226 | APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS median_lighthouse_score_pwa,
227 | APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS median_lighthouse_score_seo,
228 |
229 | # Page weight stats
230 | APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS median_bytes_total,
231 | APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS median_bytes_js,
232 | APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS median_bytes_image
233 |
234 | FROM
235 | lab_data
236 | JOIN
237 | crux
238 | USING
239 | (client, root_page_url)
240 | GROUP BY
241 | app,
242 | geo,
243 | rank,
244 | client
245 |
--------------------------------------------------------------------------------