├── .github └── FUNDING.yml ├── .gitignore ├── README.md ├── opportunities ├── 01_initialization.sql ├── 02_aggregation.sql ├── 03_impact.sql ├── 04_opportunities.sql ├── README.md └── lighthouse_cwv.js └── sql ├── all.sql └── monthly.sql /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: httparchive 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **This repository is deprecated.** Please see [HTTPArchive/dataform](https://github.com/HTTPArchive/dataform) for the next version of CWV Tech Report data pipeline. 2 | 3 | # Core Web Vitals Technology Report 4 | 5 | image 6 | 7 | Public dashboard: https://cwvtech.report/ 8 | 9 | Methodology: https://discuss.httparchive.org/t/new-dashboard-the-core-web-vitals-technology-report/2178 10 | -------------------------------------------------------------------------------- /opportunities/01_initialization.sql: -------------------------------------------------------------------------------- 1 | # Note: you'll need to manually configure this query in the BigQuery UI to overwrite the contents of the `httparchive.scratchspace.lh_cwv` table. 2 | 3 | CREATE TEMP FUNCTION IS_INTERESTING(app STRING) RETURNS BOOL AS ( 4 | app IN ( 5 | # JavaScript frameworks and libraries 6 | 'React', 'Next.js', 'Angular', 'Vue.js', 'Nuxt.js', 'Svelte', 'SvelteKit', 'Preact', 7 | # CMSs 8 | 'WordPress', 'Drupal', 'Joomla', 'Wix', 'Squarespace', 'Duda', 'Shopify', 'Elementor') 9 | ); 10 | 11 | # This content is generated from lighthouse_cwv.js. 12 | CREATE TEMP FUNCTION GET_AUDITS(lhr STRING) RETURNS ARRAY> AS ( 13 | [ 14 | STRUCT('FID' AS metric, 'third-party-summary' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-summary".score') AS FLOAT64) >= 0.9 AS passing), 15 | STRUCT('FID' AS metric, 'third-party-facades' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-facades".score') AS FLOAT64) >= 0.9 AS passing), 16 | STRUCT('FID' AS metric, 'bootup-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."bootup-time".score') AS FLOAT64) >= 0.9 AS passing), 17 | STRUCT('FID' AS metric, 'mainthread-work-breakdown' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."mainthread-work-breakdown".score') AS FLOAT64) >= 0.9 AS passing), 18 | STRUCT('FID' AS metric, 'dom-size' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."dom-size".score') AS FLOAT64) >= 0.9 AS passing), 19 | STRUCT('FID' AS metric, 'duplicated-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."duplicated-javascript".score') AS FLOAT64) >= 0.9 AS passing), 20 | STRUCT('FID' AS metric, 'legacy-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."legacy-javascript".score') AS FLOAT64) >= 0.9 AS passing), 21 | STRUCT('FID' AS metric, 'viewport' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."viewport".score') AS FLOAT64) >= 0.9 AS passing), 22 | STRUCT('LCP' AS metric, 'server-response-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."server-response-time".score') AS FLOAT64) >= 0.9 AS passing), 23 | STRUCT('LCP' AS metric, 'render-blocking-resources' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."render-blocking-resources".score') AS FLOAT64) >= 0.9 AS passing), 24 | STRUCT('LCP' AS metric, 'redirects' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."redirects".score') AS FLOAT64) >= 0.9 AS passing), 25 | STRUCT('LCP' AS metric, 'uses-text-compression' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-text-compression".score') AS FLOAT64) >= 0.9 AS passing), 26 | STRUCT('LCP' AS metric, 'uses-rel-preconnect' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preconnect".score') AS FLOAT64) >= 0.9 AS passing), 27 | STRUCT('LCP' AS metric, 'uses-rel-preload' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preload".score') AS FLOAT64) >= 0.9 AS passing), 28 | STRUCT('LCP' AS metric, 'font-display' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."font-display".score') AS FLOAT64) >= 0.9 AS passing), 29 | STRUCT('LCP' AS metric, 'unminified-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-javascript".score') AS FLOAT64) >= 0.9 AS passing), 30 | STRUCT('LCP' AS metric, 'unminified-css' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-css".score') AS FLOAT64) >= 0.9 AS passing), 31 | STRUCT('LCP' AS metric, 'unused-css-rules' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-css-rules".score') AS FLOAT64) >= 0.9 AS passing), 32 | STRUCT('LCP' AS metric, 'preload-lcp-image' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."preload-lcp-image".score') AS FLOAT64) >= 0.9 AS passing), 33 | STRUCT('LCP' AS metric, 'unused-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-javascript".score') AS FLOAT64) >= 0.9 AS passing), 34 | STRUCT('LCP' AS metric, 'efficient-animated-content' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."efficient-animated-content".score') AS FLOAT64) >= 0.9 AS passing), 35 | STRUCT('LCP' AS metric, 'total-byte-weight' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."total-byte-weight".score') AS FLOAT64) >= 0.9 AS passing), 36 | STRUCT('LCP' AS metric, 'uses-optimized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-optimized-images".score') AS FLOAT64) >= 0.9 AS passing), 37 | STRUCT('LCP' AS metric, 'uses-responsive-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-responsive-images".score') AS FLOAT64) >= 0.9 AS passing), 38 | STRUCT('LCP' AS metric, 'modern-image-formats' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."modern-image-formats".score') AS FLOAT64) >= 0.9 AS passing), 39 | STRUCT('CLS' AS metric, 'unsized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unsized-images".score') AS FLOAT64) >= 0.9 AS passing) 40 | ] 41 | ); 42 | 43 | CREATE TEMP FUNCTION GET_PCT_GOOD_LCP(har STRING) RETURNS NUMERIC AS ( 44 | SAFE_CAST(JSON_VALUE(har, '$._CrUX.metrics.largest_contentful_paint.percentiles.p75') AS NUMERIC) 45 | ); 46 | CREATE TEMP FUNCTION GET_PCT_GOOD_CLS(har STRING) RETURNS NUMERIC AS ( 47 | SAFE_CAST(JSON_VALUE(har, '$._CrUX.metrics.cumulative_layout_shift.percentiles.p75') AS NUMERIC) 48 | ); 49 | CREATE TEMP FUNCTION GET_PCT_GOOD_FID(har STRING) RETURNS NUMERIC AS ( 50 | SAFE_CAST(JSON_VALUE(har, '$._CrUX.metrics.first_input_delay.percentiles.p75') AS NUMERIC) 51 | ); 52 | 53 | CREATE TEMP FUNCTION GET_LAB_LCP(har STRING) RETURNS NUMERIC AS ( 54 | CAST(JSON_VALUE(har, '$."_chromeUserTiming.LargestContentfulPaint"') AS NUMERIC) 55 | ); 56 | CREATE TEMP FUNCTION GET_LAB_CLS(har STRING) RETURNS NUMERIC AS ( 57 | CAST(JSON_VALUE(har, '$."_chromeUserTiming.CumulativeLayoutShift"') AS NUMERIC) 58 | ); 59 | CREATE TEMP FUNCTION GET_LAB_TBT(har STRING) RETURNS NUMERIC AS ( 60 | CAST(JSON_VALUE(har, '$."_TotalBlockingTime"') AS NUMERIC) 61 | ); 62 | 63 | CREATE TEMP FUNCTION GET_CWV(har STRING) RETURNS ARRAY> AS ( 64 | [ 65 | STRUCT('field' AS env, 'LCP' AS metric, GET_PCT_GOOD_LCP(har) AS value), 66 | STRUCT('field' AS env, 'FID' AS metric, GET_PCT_GOOD_FID(har) AS value), 67 | STRUCT('field' AS env, 'CLS' AS metric, GET_PCT_GOOD_CLS(har) AS value), 68 | STRUCT('lab' AS env, 'LCP' AS metric, GET_LAB_LCP(har) AS value), 69 | STRUCT('lab' AS env, 'FID' AS metric, GET_LAB_TBT(har) AS value), 70 | STRUCT('lab' AS env, 'CLS' AS metric, GET_LAB_CLS(har) AS value) 71 | ] 72 | ); 73 | 74 | CREATE TEMP FUNCTION CALC_PCT_DIFF(before NUMERIC, after NUMERIC) RETURNS NUMERIC AS ( 75 | SAFE_DIVIDE(after - before, before) 76 | ); 77 | 78 | WITH technologies AS ( 79 | SELECT DISTINCT 80 | url, 81 | app AS technology 82 | FROM 83 | `httparchive.technologies.2022_05_01_mobile` 84 | WHERE 85 | IS_INTERESTING(app) 86 | ), audits AS ( 87 | SELECT 88 | url, 89 | metric, 90 | audit, 91 | after.passing AS improved 92 | FROM ( 93 | SELECT 94 | url, 95 | audit.* 96 | FROM 97 | `httparchive.lighthouse.2021_04_01_mobile`, 98 | UNNEST(GET_AUDITS(report)) AS audit 99 | UNION ALL 100 | SELECT 101 | url, 102 | audit.* 103 | FROM 104 | `httparchive.lighthouse.2021_06_01_mobile`, 105 | UNNEST([ 106 | STRUCT('LCP' AS metric, 'modern-image-formats' AS audit, SAFE_CAST(JSON_VALUE(report, '$.audits."modern-image-formats".score') AS FLOAT64) >= 0.9 AS passing) 107 | ]) AS audit) AS before 108 | JOIN ( 109 | SELECT 110 | url, 111 | audit.* 112 | FROM 113 | `httparchive.lighthouse.2022_05_01_mobile`, 114 | UNNEST(GET_AUDITS(report)) AS audit) AS after 115 | USING 116 | (url, metric, audit) 117 | WHERE 118 | before.passing != after.passing AND 119 | before.passing IS NOT NULL AND 120 | after.passing IS NOT NULL 121 | ), cwv AS ( 122 | SELECT 123 | url, 124 | env, 125 | metric, 126 | before.value AS before, 127 | after.value AS after, 128 | CALC_PCT_DIFF(before.value, after.value) AS pct_diff 129 | FROM ( 130 | SELECT 131 | url, 132 | env, 133 | metric, 134 | value 135 | FROM 136 | `httparchive.pages.2021_04_01_mobile`, 137 | UNNEST(GET_CWV(payload))) AS before 138 | JOIN ( 139 | SELECT 140 | url, 141 | env, 142 | metric, 143 | value 144 | FROM 145 | `httparchive.pages.2022_05_01_mobile`, 146 | UNNEST(GET_CWV(payload))) AS after 147 | USING 148 | (url, env, metric) 149 | ) 150 | 151 | SELECT 152 | * 153 | FROM 154 | technologies 155 | JOIN 156 | audits 157 | USING 158 | (url) 159 | JOIN 160 | cwv 161 | USING 162 | (url, metric) 163 | WHERE 164 | pct_diff IS NOT NULL 165 | -------------------------------------------------------------------------------- /opportunities/02_aggregation.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE TABLE `httparchive.scratchspace.lh_cwv_stats` AS 2 | SELECT 3 | percentile, 4 | technology, 5 | metric, 6 | audit, 7 | env, 8 | improved, 9 | COUNT(0) AS pages, 10 | APPROX_QUANTILES(pct_diff, 1000)[OFFSET(percentile * 10)] AS pct_diff 11 | FROM 12 | `httparchive.scratchspace.lh_cwv`, 13 | UNNEST(GENERATE_ARRAY(1, 100, 1)) AS percentile 14 | GROUP BY 15 | percentile, 16 | technology, 17 | metric, 18 | audit, 19 | env, 20 | improved 21 | -------------------------------------------------------------------------------- /opportunities/03_impact.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE TABLE `httparchive.scratchspace.lh_cwv_stats2` AS 2 | SELECT 3 | technology, 4 | metric, 5 | audit, 6 | env, 7 | passed.pages + failed.pages AS pages, 8 | passed.pct_diff - failed.pct_diff AS estimated_passing_impact_pct 9 | FROM ( 10 | SELECT 11 | technology, 12 | metric, 13 | audit, 14 | env, 15 | pages, 16 | pct_diff 17 | FROM 18 | `httparchive.scratchspace.lh_cwv_stats` 19 | WHERE 20 | percentile = 50 AND 21 | NOT improved) AS failed 22 | JOIN ( 23 | SELECT 24 | technology, 25 | metric, 26 | audit, 27 | env, 28 | pages, 29 | pct_diff 30 | FROM 31 | `httparchive.scratchspace.lh_cwv_stats` 32 | WHERE 33 | percentile = 50 AND 34 | improved) AS passed 35 | USING 36 | (technology, metric, audit, env) 37 | ORDER BY 38 | pages DESC 39 | -------------------------------------------------------------------------------- /opportunities/04_opportunities.sql: -------------------------------------------------------------------------------- 1 | CREATE TEMP FUNCTION IS_INTERESTING(app STRING) RETURNS BOOL AS ( 2 | app IN ( 3 | # JavaScript frameworks and libraries 4 | 'React', 'Next.js', 'Angular', 'Vue.js', 'Nuxt.js', 'Svelte', 'SvelteKit', 'Preact', 5 | # CMSs 6 | 'WordPress', 'Drupal', 'Joomla', 'Wix', 'Squarespace', 'Duda', 'Shopify', 'Elementor') 7 | ); 8 | 9 | CREATE TEMP FUNCTION GET_AUDITS(lhr STRING) RETURNS ARRAY> AS ( 10 | [ 11 | STRUCT('FID' AS metric, 'third-party-summary' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-summary".score') AS FLOAT64) >= 0.9 AS passing), 12 | STRUCT('FID' AS metric, 'third-party-facades' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."third-party-facades".score') AS FLOAT64) >= 0.9 AS passing), 13 | STRUCT('FID' AS metric, 'bootup-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."bootup-time".score') AS FLOAT64) >= 0.9 AS passing), 14 | STRUCT('FID' AS metric, 'mainthread-work-breakdown' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."mainthread-work-breakdown".score') AS FLOAT64) >= 0.9 AS passing), 15 | STRUCT('FID' AS metric, 'dom-size' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."dom-size".score') AS FLOAT64) >= 0.9 AS passing), 16 | STRUCT('FID' AS metric, 'duplicated-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."duplicated-javascript".score') AS FLOAT64) >= 0.9 AS passing), 17 | STRUCT('FID' AS metric, 'legacy-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."legacy-javascript".score') AS FLOAT64) >= 0.9 AS passing), 18 | STRUCT('FID' AS metric, 'viewport' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."viewport".score') AS FLOAT64) >= 0.9 AS passing), 19 | STRUCT('LCP' AS metric, 'server-response-time' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."server-response-time".score') AS FLOAT64) >= 0.9 AS passing), 20 | STRUCT('LCP' AS metric, 'render-blocking-resources' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."render-blocking-resources".score') AS FLOAT64) >= 0.9 AS passing), 21 | STRUCT('LCP' AS metric, 'redirects' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."redirects".score') AS FLOAT64) >= 0.9 AS passing), 22 | STRUCT('LCP' AS metric, 'uses-text-compression' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-text-compression".score') AS FLOAT64) >= 0.9 AS passing), 23 | STRUCT('LCP' AS metric, 'uses-rel-preconnect' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preconnect".score') AS FLOAT64) >= 0.9 AS passing), 24 | STRUCT('LCP' AS metric, 'uses-rel-preload' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-rel-preload".score') AS FLOAT64) >= 0.9 AS passing), 25 | STRUCT('LCP' AS metric, 'font-display' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."font-display".score') AS FLOAT64) >= 0.9 AS passing), 26 | STRUCT('LCP' AS metric, 'unminified-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-javascript".score') AS FLOAT64) >= 0.9 AS passing), 27 | STRUCT('LCP' AS metric, 'unminified-css' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unminified-css".score') AS FLOAT64) >= 0.9 AS passing), 28 | STRUCT('LCP' AS metric, 'unused-css-rules' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-css-rules".score') AS FLOAT64) >= 0.9 AS passing), 29 | STRUCT('LCP' AS metric, 'preload-lcp-image' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."preload-lcp-image".score') AS FLOAT64) >= 0.9 AS passing), 30 | STRUCT('LCP' AS metric, 'unused-javascript' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unused-javascript".score') AS FLOAT64) >= 0.9 AS passing), 31 | STRUCT('LCP' AS metric, 'efficient-animated-content' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."efficient-animated-content".score') AS FLOAT64) >= 0.9 AS passing), 32 | STRUCT('LCP' AS metric, 'total-byte-weight' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."total-byte-weight".score') AS FLOAT64) >= 0.9 AS passing), 33 | STRUCT('LCP' AS metric, 'uses-optimized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-optimized-images".score') AS FLOAT64) >= 0.9 AS passing), 34 | STRUCT('LCP' AS metric, 'uses-responsive-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."uses-responsive-images".score') AS FLOAT64) >= 0.9 AS passing), 35 | STRUCT('LCP' AS metric, 'modern-image-formats' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."modern-image-formats".score') AS FLOAT64) >= 0.9 AS passing), 36 | STRUCT('CLS' AS metric, 'unsized-images' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."unsized-images".score') AS FLOAT64) >= 0.9 AS passing) 37 | ] 38 | ); 39 | 40 | WITH audits_with_impact AS ( 41 | SELECT DISTINCT 42 | technology, 43 | audit 44 | FROM 45 | `httparchive.scratchspace.lh_cwv_stats` 46 | ), technologies AS ( 47 | SELECT DISTINCT 48 | url, 49 | app AS technology, 50 | COUNT(DISTINCT url) OVER (PARTITION BY app) AS total 51 | FROM 52 | `httparchive.technologies.2022_05_01_mobile` 53 | WHERE 54 | IS_INTERESTING(app) 55 | ), ranks AS ( 56 | SELECT 57 | url, 58 | CASE rank 59 | WHEN 1000 THEN 'head' 60 | WHEN 10000 THEN 'torso' 61 | WHEN 100000 THEN 'torso' 62 | WHEN 1000000 THEN 'torso' 63 | ELSE 'tail' 64 | END AS rank 65 | FROM 66 | `httparchive.summary_pages.2022_05_01_mobile` 67 | ), failing_audits AS ( 68 | SELECT 69 | audit.audit, 70 | url 71 | FROM 72 | `httparchive.lighthouse.2022_05_01_mobile`, 73 | UNNEST(GET_AUDITS(report)) AS audit 74 | WHERE 75 | NOT passing 76 | ), totals_per_rank_technology AS ( 77 | SELECT 78 | technology, 79 | rank, 80 | COUNT(DISTINCT url) AS total_per_rank 81 | FROM 82 | ranks 83 | JOIN 84 | technologies 85 | USING 86 | (url) 87 | GROUP BY 88 | technology, 89 | rank 90 | ) 91 | 92 | SELECT 93 | technology, 94 | audit, 95 | rank, 96 | COUNT(0) AS failing, 97 | ANY_VALUE(total) AS total_technology, 98 | COUNT(0) / ANY_VALUE(total) AS pct_failing_technology, 99 | ANY_VALUE(total_per_rank) AS total_per_rank, 100 | COUNT(0) / ANY_VALUE(total_per_rank) AS pct_failing_per_technology_rank 101 | FROM 102 | audits_with_impact 103 | JOIN 104 | technologies 105 | USING 106 | (technology) 107 | JOIN 108 | failing_audits 109 | USING 110 | (audit, url) 111 | JOIN 112 | ranks 113 | USING 114 | (url) 115 | JOIN 116 | totals_per_rank_technology 117 | USING 118 | (technology, rank) 119 | GROUP BY 120 | technology, 121 | audit, 122 | rank 123 | ORDER BY 124 | total_technology DESC 125 | -------------------------------------------------------------------------------- /opportunities/README.md: -------------------------------------------------------------------------------- 1 | # CWV opportunities 2 | 3 | The purpose of this project is to supplement the CWV Technology Report with a dashboard to assess the opportunities for each technology. 4 | 5 | - Stage 1 initializes the data for the particular technologies/audits of interest 6 | - lighthouse_cwv.js generates the corresponding SQL for the audit map 7 | - Stage 2 aggregates the results from Stage 1 into discrete percentiles 8 | - Stage 3 calculates the estimated impact from the medians in Stage 2 9 | - Stage 4 calculates the remaining opportunity based on audit failure rates 10 | -------------------------------------------------------------------------------- /opportunities/lighthouse_cwv.js: -------------------------------------------------------------------------------- 1 | // This script assumes that there exists a `lhr` object containing a Lighthouse Report. 2 | // It generates the SQL needed to extract the before/after status for each audit, used by 01_initialization.sql. 3 | 4 | lhr.categories.performance.auditRefs.filter(ar => { 5 | return ['TBT', 'LCP', 'CLS'].includes(ar.acronym); 6 | }).map(ar => { 7 | return [ar.acronym, ar.relevantAudits]; 8 | }).map(([metric, ras]) => { 9 | return [ 10 | metric, 11 | ras.map(ra => { 12 | return lhr.audits[ra]; 13 | }).filter(a => { 14 | return a.scoreDisplayMode != 'informative'; 15 | }) 16 | ]; 17 | }).flatMap(([metric, audits]) => { 18 | return audits.map(a => { 19 | return `STRUCT('${metric == 'TBT' ? 'FID' : metric}' AS metric, '${a.id}' AS audit, SAFE_CAST(JSON_VALUE(lhr, '$.audits."${a.id}".score') AS FLOAT64) >= 0.9 AS passing)`; 20 | }); 21 | }).join(',\n'); 22 | -------------------------------------------------------------------------------- /sql/all.sql: -------------------------------------------------------------------------------- 1 | CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( 2 | SAFE_DIVIDE(good, good + needs_improvement + poor) >= 0.75 3 | ); 4 | 5 | CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( 6 | good + needs_improvement + poor > 0 7 | ); 8 | 9 | CREATE TEMP FUNCTION GET_LIGHTHOUSE_CATEGORY_SCORES(categories STRING) 10 | RETURNS STRUCT 11 | LANGUAGE js AS ''' 12 | try { 13 | const $ = JSON.parse(categories); 14 | return { 15 | accessibility: $.accessibility?.score, 16 | 'best-practices': $['best-practices']?.score, 17 | performance: $.performance?.score, 18 | pwa: $.pwa?.score, 19 | seo: $.seo?.score 20 | }; 21 | } catch (e) { 22 | return {}; 23 | } 24 | '''; 25 | 26 | WITH latest_technologies AS ( 27 | SELECT 28 | category, 29 | app 30 | FROM 31 | `httparchive.technologies.2022_04_01_mobile` 32 | ), geo_summary AS ( 33 | SELECT 34 | CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\d{4})(\d{2})', r'\1-\2-01') AS DATE) AS date, 35 | * EXCEPT (country_code), 36 | `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS geo 37 | FROM 38 | `chrome-ux-report.materialized.country_summary` 39 | UNION ALL 40 | SELECT 41 | * EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin), 42 | 'ALL' AS geo 43 | FROM 44 | `chrome-ux-report.materialized.device_summary` 45 | ), crux AS ( 46 | SELECT 47 | date, 48 | geo, 49 | CASE _rank 50 | WHEN 100000000 THEN 'ALL' 51 | WHEN 10000000 THEN 'Top 10M' 52 | WHEN 1000000 THEN 'Top 1M' 53 | WHEN 100000 THEN 'Top 100k' 54 | WHEN 10000 THEN 'Top 10k' 55 | WHEN 1000 THEN 'Top 1k' 56 | END AS rank, 57 | CONCAT(origin, '/') AS url, 58 | IF(device = 'desktop', 'desktop', 'mobile') AS client, 59 | 60 | # CWV 61 | IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AS any_fid, 62 | IS_GOOD(fast_fid, avg_fid, slow_fid) AS good_fid, 63 | IS_NON_ZERO(small_cls, medium_cls, large_cls) AS any_cls, 64 | IS_GOOD(small_cls, medium_cls, large_cls) AS good_cls, 65 | IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AS any_lcp, 66 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_lcp, 67 | 68 | (IS_GOOD(fast_inp, avg_inp, slow_inp) OR fast_inp IS NULL) AND 69 | IS_GOOD(small_cls, medium_cls, large_cls) AND 70 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2024, 71 | 72 | (IS_GOOD(fast_fid, avg_fid, slow_fid) OR fast_fid IS NULL) AND 73 | IS_GOOD(small_cls, medium_cls, large_cls) AND 74 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2023, 75 | 76 | # WV 77 | IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp) AS any_fcp, 78 | IS_GOOD(fast_fcp, avg_fcp, slow_fcp) AS good_fcp, 79 | IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb) AS any_ttfb, 80 | IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb) AS good_ttfb, 81 | IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AS any_inp, 82 | IS_GOOD(fast_inp, avg_inp, slow_inp) AS good_inp 83 | FROM 84 | geo_summary, 85 | UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank 86 | WHERE 87 | date >= '2020-01-01' AND 88 | device IN ('desktop', 'phone') AND 89 | (rank <= _rank OR (rank IS NULL AND _rank = 100000000)) 90 | ), technologies AS ( 91 | SELECT DISTINCT 92 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date, 93 | app, 94 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client, 95 | url 96 | FROM 97 | `httparchive.technologies.202*` 98 | WHERE 99 | app IS NOT NULL AND 100 | app != '' 101 | UNION ALL 102 | SELECT 103 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date, 104 | 'ALL' AS app, 105 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client, 106 | url 107 | FROM 108 | `httparchive.summary_pages.202*` 109 | ), categories AS ( 110 | SELECT 111 | app, 112 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category 113 | FROM 114 | latest_technologies 115 | GROUP BY 116 | app 117 | UNION ALL 118 | SELECT 119 | 'ALL' AS app, 120 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category 121 | FROM 122 | latest_technologies 123 | ), summary_stats AS ( 124 | SELECT 125 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date, 126 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client, 127 | url, 128 | bytesTotal, 129 | bytesJS, 130 | bytesImg 131 | FROM 132 | `httparchive.summary_pages.202*` 133 | ), lighthouse AS ( 134 | SELECT 135 | CAST(REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d)_(\d{2})_(\d{2}).*', r'202\1-\2-\3') AS DATE) AS date, 136 | IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client, 137 | url, 138 | GET_LIGHTHOUSE_CATEGORY_SCORES(JSON_QUERY(report, '$.categories')) AS lighthouse_category 139 | FROM 140 | `httparchive.lighthouse.202*` 141 | ) 142 | 143 | 144 | SELECT 145 | date, 146 | geo, 147 | rank, 148 | ANY_VALUE(category) AS category, 149 | app, 150 | client, 151 | COUNT(0) AS origins, 152 | 153 | # CrUX data 154 | COUNTIF(good_fid) AS origins_with_good_fid, 155 | COUNTIF(good_cls) AS origins_with_good_cls, 156 | COUNTIF(good_lcp) AS origins_with_good_lcp, 157 | COUNTIF(good_fcp) AS origins_with_good_fcp, 158 | COUNTIF(good_ttfb) AS origins_with_good_ttfb, 159 | COUNTIF(good_inp) AS origins_with_good_inp, 160 | COUNTIF(any_fid) AS origins_with_any_fid, 161 | COUNTIF(any_cls) AS origins_with_any_cls, 162 | COUNTIF(any_lcp) AS origins_with_any_lcp, 163 | COUNTIF(any_fcp) AS origins_with_any_fcp, 164 | COUNTIF(any_ttfb) AS origins_with_any_ttfb, 165 | COUNTIF(any_inp) AS origins_with_any_inp, 166 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv, 167 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv_2024, 168 | COUNTIF(good_cwv_2023) AS origins_with_good_cwv_2023, 169 | COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv, 170 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv, 171 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2024, 172 | SAFE_DIVIDE(COUNTIF(good_cwv_2023), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2023, 173 | 174 | # Lighthouse data 175 | APPROX_QUANTILES(lighthouse_category.accessibility, 1000)[OFFSET(500)] AS median_lighthouse_score_accessibility, 176 | APPROX_QUANTILES(lighthouse_category.best_practices, 1000)[OFFSET(500)] AS median_lighthouse_score_best_practices, 177 | APPROX_QUANTILES(lighthouse_category.performance, 1000)[OFFSET(500)] AS median_lighthouse_score_performance, 178 | APPROX_QUANTILES(lighthouse_category.pwa, 1000)[OFFSET(500)] AS median_lighthouse_score_pwa, 179 | APPROX_QUANTILES(lighthouse_category.seo, 1000)[OFFSET(500)] AS median_lighthouse_score_seo, 180 | 181 | # Page weight stats 182 | APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS median_bytes_total, 183 | APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS median_bytes_js, 184 | APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS median_bytes_image 185 | 186 | FROM 187 | technologies 188 | JOIN 189 | categories 190 | USING 191 | (app) 192 | JOIN 193 | summary_stats 194 | USING 195 | (date, client, url) 196 | LEFT JOIN 197 | lighthouse 198 | USING 199 | (date, client, url) 200 | JOIN 201 | crux 202 | USING 203 | (date, client, url) 204 | GROUP BY 205 | date, 206 | rank, 207 | geo, 208 | app, 209 | client 210 | -------------------------------------------------------------------------------- /sql/monthly.sql: -------------------------------------------------------------------------------- 1 | # UPDATE THIS EACH MONTH 2 | DECLARE _YYYYMMDD DATE DEFAULT '2024-02-01'; 3 | 4 | 5 | CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( 6 | SAFE_DIVIDE(good, good + needs_improvement + poor) >= 0.75 7 | ); 8 | 9 | CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( 10 | good + needs_improvement + poor > 0 11 | ); 12 | 13 | CREATE TEMP FUNCTION GET_LIGHTHOUSE_CATEGORY_SCORES(categories STRING) 14 | RETURNS STRUCT 15 | LANGUAGE js AS ''' 16 | try { 17 | const $ = JSON.parse(categories); 18 | return { 19 | accessibility: $.accessibility?.score, 20 | best_practices: $['best-practices']?.score, 21 | performance: $.performance?.score, 22 | pwa: $.pwa?.score, 23 | seo: $.seo?.score 24 | }; 25 | } catch (e) { 26 | return {}; 27 | } 28 | '''; 29 | 30 | INSERT INTO 31 | `httparchive.core_web_vitals.technologies` 32 | 33 | WITH geo_summary AS ( 34 | SELECT 35 | CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\d{4})(\d{2})', r'\1-\2-01') AS DATE) AS date, 36 | * EXCEPT (country_code), 37 | `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS geo 38 | FROM 39 | `chrome-ux-report.materialized.country_summary` 40 | WHERE 41 | yyyymm = CAST(FORMAT_DATE('%Y%m', _YYYYMMDD) AS INT64) AND 42 | device IN ('desktop', 'phone') 43 | UNION ALL 44 | SELECT 45 | * EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin), 46 | 'ALL' AS geo 47 | FROM 48 | `chrome-ux-report.materialized.device_summary` 49 | WHERE 50 | date = _YYYYMMDD AND 51 | device IN ('desktop', 'phone') 52 | ), 53 | 54 | crux AS ( 55 | SELECT 56 | geo, 57 | CASE _rank 58 | WHEN 100000000 THEN 'ALL' 59 | WHEN 10000000 THEN 'Top 10M' 60 | WHEN 1000000 THEN 'Top 1M' 61 | WHEN 100000 THEN 'Top 100k' 62 | WHEN 10000 THEN 'Top 10k' 63 | WHEN 1000 THEN 'Top 1k' 64 | END AS rank, 65 | CONCAT(origin, '/') AS root_page_url, 66 | IF(device = 'desktop', 'desktop', 'mobile') AS client, 67 | 68 | # CWV 69 | IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AS any_fid, 70 | IS_GOOD(fast_fid, avg_fid, slow_fid) AS good_fid, 71 | IS_NON_ZERO(small_cls, medium_cls, large_cls) AS any_cls, 72 | IS_GOOD(small_cls, medium_cls, large_cls) AS good_cls, 73 | IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AS any_lcp, 74 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_lcp, 75 | 76 | (IS_GOOD(fast_inp, avg_inp, slow_inp) OR fast_inp IS NULL) AND 77 | IS_GOOD(small_cls, medium_cls, large_cls) AND 78 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2024, 79 | 80 | (IS_GOOD(fast_fid, avg_fid, slow_fid) OR fast_fid IS NULL) AND 81 | IS_GOOD(small_cls, medium_cls, large_cls) AND 82 | IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2023, 83 | 84 | # WV 85 | IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp) AS any_fcp, 86 | IS_GOOD(fast_fcp, avg_fcp, slow_fcp) AS good_fcp, 87 | IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb) AS any_ttfb, 88 | IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb) AS good_ttfb, 89 | IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AS any_inp, 90 | IS_GOOD(fast_inp, avg_inp, slow_inp) AS good_inp 91 | FROM 92 | geo_summary, 93 | UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank 94 | WHERE 95 | rank <= _rank 96 | ), 97 | 98 | technologies AS ( 99 | SELECT 100 | technology.technology AS app, 101 | client, 102 | page AS url 103 | FROM 104 | `httparchive.all.pages`, 105 | UNNEST(technologies) AS technology 106 | WHERE 107 | date = _YYYYMMDD AND 108 | technology.technology IS NOT NULL AND 109 | technology.technology != '' 110 | UNION ALL 111 | SELECT 112 | 'ALL' AS app, 113 | client, 114 | page AS url 115 | FROM 116 | `httparchive.all.pages` 117 | WHERE 118 | date = _YYYYMMDD 119 | ), 120 | 121 | categories AS ( 122 | SELECT 123 | technology.technology AS app, 124 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category 125 | FROM 126 | `httparchive.all.pages`, 127 | UNNEST(technologies) AS technology, 128 | UNNEST(technology.categories) AS category 129 | WHERE 130 | date = _YYYYMMDD 131 | GROUP BY 132 | app 133 | UNION ALL 134 | SELECT 135 | 'ALL' AS app, 136 | ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category 137 | FROM 138 | `httparchive.all.pages`, 139 | UNNEST(technologies) AS technology, 140 | UNNEST(technology.categories) AS category 141 | WHERE 142 | date = _YYYYMMDD AND 143 | client = 'mobile' 144 | ), 145 | 146 | summary_stats AS ( 147 | SELECT 148 | client, 149 | page AS url, 150 | root_page AS root_page_url, 151 | CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, 152 | CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, 153 | CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, 154 | GET_LIGHTHOUSE_CATEGORY_SCORES(JSON_QUERY(lighthouse, '$.categories')) AS lighthouse_category 155 | FROM 156 | `httparchive.all.pages` 157 | WHERE 158 | date = _YYYYMMDD 159 | ), 160 | 161 | lab_data AS ( 162 | SELECT 163 | client, 164 | root_page_url, 165 | app, 166 | ANY_VALUE(category) AS category, 167 | CAST(AVG(bytesTotal) AS INT64) AS bytesTotal, 168 | CAST(AVG(bytesJS) AS INT64) AS bytesJS, 169 | CAST(AVG(bytesImg) AS INT64) AS bytesImg, 170 | CAST(AVG(lighthouse_category.accessibility) AS NUMERIC) AS accessibility, 171 | CAST(AVG(lighthouse_category.best_practices) AS NUMERIC) AS best_practices, 172 | CAST(AVG(lighthouse_category.performance) AS NUMERIC) AS performance, 173 | CAST(AVG(lighthouse_category.pwa) AS NUMERIC) AS pwa, 174 | CAST(AVG(lighthouse_category.seo) AS NUMERIC) AS seo 175 | FROM 176 | summary_stats 177 | JOIN 178 | technologies 179 | USING 180 | (client, url) 181 | JOIN 182 | categories 183 | USING 184 | (app) 185 | GROUP BY 186 | client, 187 | root_page_url, 188 | app 189 | ) 190 | 191 | 192 | SELECT 193 | _YYYYMMDD AS date, 194 | geo, 195 | rank, 196 | ANY_VALUE(category) AS category, 197 | app, 198 | client, 199 | COUNT(0) AS origins, 200 | 201 | # CrUX data 202 | COUNTIF(good_fid) AS origins_with_good_fid, 203 | COUNTIF(good_cls) AS origins_with_good_cls, 204 | COUNTIF(good_lcp) AS origins_with_good_lcp, 205 | COUNTIF(good_fcp) AS origins_with_good_fcp, 206 | COUNTIF(good_ttfb) AS origins_with_good_ttfb, 207 | COUNTIF(good_inp) AS origins_with_good_inp, 208 | COUNTIF(any_fid) AS origins_with_any_fid, 209 | COUNTIF(any_cls) AS origins_with_any_cls, 210 | COUNTIF(any_lcp) AS origins_with_any_lcp, 211 | COUNTIF(any_fcp) AS origins_with_any_fcp, 212 | COUNTIF(any_ttfb) AS origins_with_any_ttfb, 213 | COUNTIF(any_inp) AS origins_with_any_inp, 214 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv, 215 | COUNTIF(good_cwv_2024) AS origins_with_good_cwv_2024, 216 | COUNTIF(good_cwv_2023) AS origins_with_good_cwv_2023, 217 | COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv, 218 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv, 219 | SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2024, 220 | SAFE_DIVIDE(COUNTIF(good_cwv_2023), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2023, 221 | 222 | # Lighthouse data 223 | APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS median_lighthouse_score_accessibility, 224 | APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS median_lighthouse_score_best_practices, 225 | APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS median_lighthouse_score_performance, 226 | APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS median_lighthouse_score_pwa, 227 | APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS median_lighthouse_score_seo, 228 | 229 | # Page weight stats 230 | APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS median_bytes_total, 231 | APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS median_bytes_js, 232 | APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS median_bytes_image 233 | 234 | FROM 235 | lab_data 236 | JOIN 237 | crux 238 | USING 239 | (client, root_page_url) 240 | GROUP BY 241 | app, 242 | geo, 243 | rank, 244 | client 245 | --------------------------------------------------------------------------------