├── .editorconfig ├── .gitignore ├── site ├── thumb.png ├── icons │ ├── fonts │ │ ├── icomoon.eot │ │ ├── icomoon.ttf │ │ ├── icomoon.woff │ │ └── icomoon.svg │ └── style.css ├── colors.css ├── logo.svg ├── style.css ├── index.html └── index.js ├── .github ├── FUNDING.yml └── workflows │ ├── test.yml │ ├── status.yml │ └── deploy.yml ├── cli.js ├── .babelrc ├── .gitmodules ├── lib ├── datetime.test.js ├── transform.test.js ├── __test_utils__ │ └── fs.js ├── parse.js ├── cliArgs.js ├── stringify.js ├── rules.js ├── parse.test.js ├── rating.js ├── README.md ├── caching.js ├── fs.js ├── fetch.test.js ├── datetime.js ├── caching.test.js ├── transform.js ├── fetch.js └── fs.test.js ├── .eslintrc.json ├── index.js ├── tasks ├── writeData.js ├── findPopulations.js ├── findFeatures.js └── scrapeData.js ├── LICENSE ├── tools ├── copySpectrumCSS.sh ├── mapJHUCounties.js └── generateColors.js ├── scripts └── statusSlackBot.js ├── package.json ├── timeseries.js └── README.md /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | quote_type = single 3 | max_line_length = 500 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cache/* 2 | dist/ 3 | node_modules 4 | .DS_Store 5 | .vscode 6 | -------------------------------------------------------------------------------- /site/thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wikunia/coronadatascraper/master/site/thumb.png -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | patreon: lazd 4 | github: lazd 5 | -------------------------------------------------------------------------------- /cli.js: -------------------------------------------------------------------------------- 1 | import generate from './index.js'; 2 | import argv from './lib/cliArgs.js'; 3 | 4 | generate(argv.date, argv); 5 | -------------------------------------------------------------------------------- /site/icons/fonts/icomoon.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wikunia/coronadatascraper/master/site/icons/fonts/icomoon.eot -------------------------------------------------------------------------------- /site/icons/fonts/icomoon.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wikunia/coronadatascraper/master/site/icons/fonts/icomoon.ttf -------------------------------------------------------------------------------- /site/icons/fonts/icomoon.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wikunia/coronadatascraper/master/site/icons/fonts/icomoon.woff -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | [ 4 | "@babel/preset-env", 5 | { 6 | "targets": { 7 | "node": "current" 8 | } 9 | } 10 | ] 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "coronavirus-data-sources"] 2 | path = coronavirus-data-sources 3 | url = https://github.com/lazd/coronavirus-data-sources.git 4 | [submodule "coronadatascraper-cache"] 5 | path = coronadatascraper-cache 6 | url = https://github.com/lazd/coronadatascraper-cache.git 7 | -------------------------------------------------------------------------------- /lib/datetime.test.js: -------------------------------------------------------------------------------- 1 | import * as datetime from './datetime.js'; 2 | 3 | describe('Datetime functions', () => { 4 | describe('scrapeDateIs', () => { 5 | test('matches environment var', () => { 6 | process.env.SCRAPE_DATE = '2020-3-15'; 7 | expect(datetime.scrapeDateIs('2020-3-15')).toBe(true); 8 | }); 9 | }); 10 | }); 11 | -------------------------------------------------------------------------------- /site/colors.css: -------------------------------------------------------------------------------- 1 | .spectrum { 2 | --Gray33: #ffffff; 3 | --Gray67: #fafbfc; 4 | --Gray100: #f3f5f6; 5 | --Gray200: #e8ebed; 6 | --Gray300: #dde2e5; 7 | --Gray400: #c3cbd2; 8 | --Gray500: #a9b5be; 9 | --Gray600: #7f909e; 10 | --Gray700: #5c7384; 11 | --Gray800: #334e62; 12 | --Gray900: #1e2e39; 13 | --Blue100: #6c961d; 14 | --Blue200: #007b63; 15 | --Blue300: #005772; 16 | --Blue400: #1f3354; 17 | } 18 | -------------------------------------------------------------------------------- /lib/transform.test.js: -------------------------------------------------------------------------------- 1 | import each from 'jest-each'; 2 | 3 | import * as transform from './transform.js'; 4 | 5 | describe('Transform functions', () => { 6 | describe('toISO3166Alpha3', () => { 7 | const tests = [['France', 'FRA']]; 8 | each(tests).test('when passed "%s", it returns %d', (str, expected) => { 9 | expect(transform.toISO3166Alpha3(str)).toBe(expected); 10 | }); 11 | }); 12 | }); 13 | -------------------------------------------------------------------------------- /lib/__test_utils__/fs.js: -------------------------------------------------------------------------------- 1 | import fsMock from 'mock-fs'; 2 | 3 | let logsTemp = []; 4 | let logMock; 5 | 6 | export const mock = config => { 7 | logMock = jest.spyOn(console, 'log').mockImplementation((...args) => { 8 | logsTemp.push(args); 9 | }); 10 | fsMock(config); 11 | }; 12 | 13 | export const restore = () => { 14 | logMock.mockRestore(); 15 | fsMock.restore(); 16 | logsTemp.map(el => console.log(...el)); 17 | logsTemp = []; 18 | }; 19 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["airbnb-base", "plugin:jest/recommended", "plugin:jest/style", "plugin:prettier/recommended"], 3 | "parser": "babel-eslint", 4 | "rules": { 5 | "jest/no-standalone-expect": "warn", 6 | "no-async-promise-executor": "warn", 7 | "no-await-in-loop": "warn", 8 | "no-console": "off", 9 | "no-continue": "warn", 10 | "no-labels": "warn", 11 | "no-param-reassign": "warn", 12 | "no-plusplus": "off", 13 | "no-restricted-syntax": "off", 14 | "no-shadow": "warn", 15 | "import/extensions": ["error", "ignorePackages"], 16 | "no-underscore-dangle": "off", 17 | "jest/no-try-expect": "off" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import scrapeData from './tasks/scrapeData.js'; 2 | import findFeatures from './tasks/findFeatures.js'; 3 | import findPopulations from './tasks/findPopulations.js'; 4 | import writeData from './tasks/writeData.js'; 5 | 6 | async function generate(date, options = { findFeatures: true, findPopulations: true, writeData: true, skip: null, location: null }) { 7 | if (date) { 8 | process.env.SCRAPE_DATE = date; 9 | } else { 10 | delete process.env.SCRAPE_DATE; 11 | } 12 | 13 | // JSON used for reporting 14 | const report = { 15 | date 16 | }; 17 | 18 | const output = await scrapeData({ report, options }) 19 | .then(options.findFeatures !== false && findFeatures) 20 | .then(options.findPopulations !== false && findPopulations) 21 | .then(options.writeData !== false && writeData); 22 | 23 | return output; 24 | } 25 | 26 | export default generate; 27 | -------------------------------------------------------------------------------- /lib/parse.js: -------------------------------------------------------------------------------- 1 | /* 2 | Turn the provided string into a number, ignoring non-numeric data 3 | */ 4 | function number(string) { 5 | if (typeof string === 'number') { 6 | return string; 7 | } 8 | if (string === '') { 9 | return 0; 10 | } 11 | return parseInt(string.replace(/[^\d-]/g, ''), 10); 12 | } 13 | 14 | /* 15 | Turn the provided string into a floating point number 16 | */ 17 | function float(string) { 18 | if (string === '') { 19 | return 0; 20 | } 21 | return parseFloat(string.replace(/[^\d.-]/g, '')); 22 | } 23 | 24 | /* 25 | Remove line breaks, double spaces, zero-width space, asterisk, and trim the provided string 26 | */ 27 | function string(string) { 28 | return string 29 | .replace(/\n/g, ' ') 30 | .replace(/\s+/, ' ') 31 | .replace(/\u200B/g, '') 32 | .replace(/\*/g, '') 33 | .trim(); 34 | } 35 | 36 | export { number, float, string }; 37 | -------------------------------------------------------------------------------- /tasks/writeData.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import * as fs from '../lib/fs.js'; 3 | import * as stringify from '../lib/stringify.js'; 4 | 5 | const writeData = async ({ locations, featureCollection, report, options, sourceRatings }) => { 6 | let suffix = ''; 7 | if (options.outputSuffix !== undefined) { 8 | suffix = options.outputSuffix; 9 | } else if (process.env.SCRAPE_DATE) { 10 | suffix = `-${process.env.SCRAPE_DATE}`; 11 | } 12 | 13 | await fs.writeFile(path.join('dist', `data${suffix}.json`), JSON.stringify(locations, null, 2)); 14 | 15 | await fs.writeCSV(path.join('dist', `data${suffix}.csv`), stringify.csvForDay(locations)); 16 | 17 | await fs.writeJSON(path.join('dist', `features${suffix}.json`), featureCollection); 18 | 19 | await fs.writeJSON('dist/report.json', report); 20 | 21 | await fs.writeJSON('dist/ratings.json', sourceRatings); 22 | 23 | return { locations, featureCollection, report, options }; 24 | }; 25 | 26 | export default writeData; 27 | -------------------------------------------------------------------------------- /lib/cliArgs.js: -------------------------------------------------------------------------------- 1 | import yargs from 'yargs'; 2 | 3 | const { argv } = yargs 4 | .option('date', { 5 | alias: 'd', 6 | description: 'Generate data for or start the timeseries at the provided date in YYYY-M-D format', 7 | type: 'string' 8 | }) 9 | .option('endDate', { 10 | alias: 'e', 11 | description: 'The date after which to stop generating timeseries data', 12 | type: 'string' 13 | }) 14 | .option('location', { 15 | alias: 'l', 16 | description: 'Scrape only the location provided by full name, i.e City, County, State, Country', 17 | type: 'string' 18 | }) 19 | .option('skip', { 20 | alias: 's', 21 | description: 'Skip the location provided by full name, i.e City, County, State, Country', 22 | type: 'string' 23 | }) 24 | .option('outputSuffix', { 25 | alias: 'o', 26 | description: 'The suffix to add to output files, i.e. passing TEST will produce data-TEST.json etc', 27 | type: 'string' 28 | }) 29 | .help() 30 | .alias('help', 'h'); 31 | 32 | export default argv; 33 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: [macOS-latest] 8 | steps: 9 | - uses: actions/checkout@v1 # without submodules 10 | with: 11 | persist-credentials: false 12 | - name: Disable the keychain credential helper 13 | run: git config --global credential.helper "" 14 | - name: Enable the local store credential helper 15 | run: git config --global --add credential.helper store 16 | - name: Add credential 17 | run: echo "https://x-access-token:${{ secrets.GIT_SECRET }}@github.com" >> ~/.git-credentials 18 | - name: Tell git to use https instead of ssh whenever it encounters it 19 | run: 'git config --global url."https://github.com/".insteadof git@github.com:' 20 | - name: Update submodules 21 | run: | 22 | git submodule update --init --recursive 23 | - name: Use Node.js 13.x 24 | uses: actions/setup-node@v1 25 | with: 26 | node-version: '13.x' 27 | - name: Install 28 | run: | 29 | yarn 30 | - name: Test 31 | run: | 32 | yarn test 33 | - name: Lint 34 | run: | 35 | yarn lint 36 | -------------------------------------------------------------------------------- /site/icons/style.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'icomoon'; 3 | src: url('fonts/icomoon.eot?s1jly5'); 4 | src: url('fonts/icomoon.eot?s1jly5#iefix') format('embedded-opentype'), 5 | url('fonts/icomoon.ttf?s1jly5') format('truetype'), 6 | url('fonts/icomoon.woff?s1jly5') format('woff'), 7 | url('fonts/icomoon.svg?s1jly5#icomoon') format('svg'); 8 | font-weight: normal; 9 | font-style: normal; 10 | font-display: block; 11 | } 12 | 13 | [class^="icon-"], [class*=" icon-"] { 14 | /* use !important to prevent issues with browser extensions that change fonts */ 15 | font-family: 'icomoon' !important; 16 | speak: none; 17 | font-style: normal; 18 | font-weight: normal; 19 | font-variant: normal; 20 | text-transform: none; 21 | line-height: 1; 22 | 23 | /* Better Font Rendering =========== */ 24 | -webkit-font-smoothing: antialiased; 25 | -moz-osx-font-smoothing: grayscale; 26 | } 27 | 28 | .icon-menu:before { 29 | content: "\e905"; 30 | } 31 | .icon-file:before { 32 | content: "\e904"; 33 | } 34 | .icon-code:before { 35 | content: "\e903"; 36 | } 37 | .icon-view:before { 38 | content: "\e900"; 39 | } 40 | .icon-search:before { 41 | content: "\e901"; 42 | } 43 | .icon-download:before { 44 | content: "\e902"; 45 | } 46 | -------------------------------------------------------------------------------- /lib/stringify.js: -------------------------------------------------------------------------------- 1 | /* 2 | Generate a CSV from the given day 3 | */ 4 | function csvForDay(data) { 5 | // Start with the columns we want first 6 | let columns = ['city', 'county', 'state', 'country', 'cases', 'deaths', 'recovered', 'tested', 'active', 'population', 'lat', 'long', 'url']; 7 | 8 | // Get list of columns 9 | for (const location of data) { 10 | for (const column in location) { 11 | if (columns.indexOf(column) === -1) { 12 | columns.push(column); 13 | } 14 | } 15 | } 16 | 17 | // Drop coordinates 18 | columns = columns.filter(column => column !== 'coordinates'); 19 | 20 | // Turn data into arrays 21 | const csvData = [columns]; 22 | for (const location of data) { 23 | const row = []; 24 | for (const column of columns) { 25 | // Output lat and long instead 26 | if (column === 'lat' && location.coordinates) { 27 | row.push(location.coordinates[1]); 28 | } else if (column === 'long' && location.coordinates) { 29 | row.push(location.coordinates[0]); 30 | } else { 31 | row.push(location[column]); 32 | } 33 | } 34 | csvData.push(row); 35 | } 36 | return csvData; 37 | } 38 | 39 | // eslint-disable-next-line import/prefer-default-export 40 | export { csvForDay }; 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, Lawrence Davis 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | -------------------------------------------------------------------------------- /lib/rules.js: -------------------------------------------------------------------------------- 1 | /* 2 | Given a data object and arrays of criteria with keys/values that match the data object, 3 | determine if the object is acceptable. 4 | 5 | If the object has keys that match everything in at least one rejection criteria, it is rejected 6 | If the object has keys that match everything in at least one acceptance criteria, it's accepted 7 | */ 8 | function isAcceptable(data, acceptCriteria, rejectCriteria) { 9 | // Fail things that match reject criteria 10 | if (rejectCriteria) { 11 | for (const [, criteria] of Object.entries(rejectCriteria)) { 12 | for (const [prop, value] of Object.entries(criteria)) { 13 | if (data[prop] === value) { 14 | return false; 15 | } 16 | } 17 | } 18 | } 19 | 20 | // Accept things that pass at least one accept criteria 21 | if (acceptCriteria) { 22 | for (const [, criteria] of Object.entries(acceptCriteria)) { 23 | let criteriaMatch = true; 24 | for (const [prop, value] of Object.entries(criteria)) { 25 | if (data[prop] !== value) { 26 | criteriaMatch = false; 27 | break; 28 | } 29 | } 30 | if (criteriaMatch) { 31 | return true; 32 | } 33 | } 34 | return false; 35 | } 36 | 37 | return true; 38 | } 39 | 40 | // eslint-disable-next-line import/prefer-default-export 41 | export { isAcceptable }; 42 | -------------------------------------------------------------------------------- /tools/copySpectrumCSS.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CSSFILES=" 4 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-global.css 5 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-darkest.css 6 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-lightest.css 7 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-dark.css 8 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-light.css 9 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-medium.css 10 | node_modules/@adobe/spectrum-css/dist/components/vars/spectrum-large.css 11 | node_modules/@adobe/spectrum-css/dist/components/icon/index-vars.css 12 | node_modules/@adobe/spectrum-css/dist/components/statuslight/index-vars.css 13 | node_modules/@adobe/spectrum-css/dist/components/link/index-vars.css 14 | node_modules/@adobe/spectrum-css/dist/components/page/index-vars.css 15 | node_modules/@adobe/spectrum-css/dist/components/site/index-vars.css 16 | node_modules/@adobe/spectrum-css/dist/components/typography/index-vars.css 17 | node_modules/@adobe/spectrum-css/dist/components/button/index-vars.css 18 | node_modules/@adobe/spectrum-css/dist/components/buttongroup/index-vars.css 19 | node_modules/@adobe/spectrum-css/dist/components/site/index-vars.css 20 | node_modules/@adobe/spectrum-css/dist/components/sidenav/index-vars.css 21 | " 22 | 23 | mkdir -p dist/ 24 | cat ${CSSFILES[@]} > dist/spectrum-css.css 25 | -------------------------------------------------------------------------------- /.github/workflows/status.yml: -------------------------------------------------------------------------------- 1 | name: Scraper Status CI 2 | 3 | on: 4 | schedule: 5 | - cron: '0 */2 * * *' # Runs every 2 hours 6 | 7 | jobs: 8 | status: 9 | runs-on: [macOS-latest] 10 | steps: 11 | - uses: actions/checkout@v1 # without submodules 12 | with: 13 | persist-credentials: false 14 | - name: Disable the keychain credential helper 15 | run: git config --global credential.helper "" 16 | - name: Enable the local store credential helper 17 | run: git config --global --add credential.helper store 18 | - name: Add credential 19 | run: echo "https://x-access-token:${{ secrets.GIT_SECRET }}@github.com" >> ~/.git-credentials 20 | - name: Tell git to use https instead of ssh whenever it encounters it 21 | run: 'git config --global url."https://github.com/".insteadof git@github.com:' 22 | - name: Update submodules 23 | run: | 24 | git submodule update --init --recursive 25 | - name: Use Node.js 13.x 26 | uses: actions/setup-node@v1 27 | with: 28 | node-version: '13.x' 29 | - name: Install 30 | run: | 31 | yarn 32 | - name: Build 33 | run: | 34 | yarn start 35 | - name: Notify Slack 36 | env: 37 | SLACK_STATUS_HOOK: ${{ secrets.SLACK_STATUS_HOOK }} 38 | run: | 39 | node ./scripts/statusSlackBot.js send ${SLACK_STATUS_HOOK} 40 | -------------------------------------------------------------------------------- /tools/mapJHUCounties.js: -------------------------------------------------------------------------------- 1 | import turf from '@turf/turf'; 2 | import * as fs from '../lib/fs.js'; 3 | 4 | const LAT = 'Lat'; 5 | const LONG = 'Long'; 6 | const STATE = 'Province/State'; 7 | 8 | async function findNameMapping() { 9 | const jhuData = await fs.readCSV('cache/e20883430a9a4c7502d0a9618e49c1a9.csv'); 10 | const usCountyData = await fs.readJSON('./coronavirus-data-sources/geojson/usa-counties.json'); 11 | 12 | const countyMap = {}; 13 | 14 | // eslint-disable-next-line no-labels 15 | locationLoop: for (const location of jhuData) { 16 | if (!location[STATE] || location[STATE].indexOf(',') === -1) { 17 | continue; 18 | } 19 | 20 | let point; 21 | if (location[LAT] !== undefined && location[LONG] !== undefined) { 22 | point = turf.point([location[LONG], location[LAT]]); 23 | } 24 | 25 | if (point) { 26 | // Search within features 27 | for (const feature of usCountyData.features) { 28 | if (feature.geometry) { 29 | const poly = turf.feature(feature.geometry); 30 | if (turf.booleanPointInPolygon(point, poly)) { 31 | countyMap[location[STATE]] = feature.properties.name; 32 | // eslint-disable-next-line no-labels 33 | continue locationLoop; 34 | } 35 | } 36 | } 37 | } else { 38 | console.log('⚠️ Fuck, no coordinates for %s', location[STATE]); 39 | } 40 | } 41 | 42 | fs.writeJSON('./dist/jhuUSCountyMap.json', countyMap); 43 | } 44 | 45 | findNameMapping(); 46 | -------------------------------------------------------------------------------- /lib/parse.test.js: -------------------------------------------------------------------------------- 1 | import each from 'jest-each'; 2 | 3 | import * as parse from './parse.js'; 4 | 5 | describe('Parse functions', () => { 6 | describe('number', () => { 7 | const tests = [ 8 | ['', 0], 9 | ['0', 0], 10 | ['1', 1], 11 | ['-1', -1], 12 | ['10', 10], 13 | ['1.1', 11], 14 | ['-1.1', -11], 15 | ['25abc', 25], 16 | ['as-12asdasd', -12] 17 | ]; 18 | each(tests).test('when passed "%s", it returns %d', (str, expected) => { 19 | expect(parse.number(str)).toBe(expected); 20 | }); 21 | }); 22 | 23 | describe('float', () => { 24 | const tests = [ 25 | ['', 0], 26 | ['0', 0], 27 | ['1', 1], 28 | ['-1', -1], 29 | ['10', 10], 30 | ['1.1', 1.1], 31 | ['-1.1', -1.1], 32 | ['25abc', 25], 33 | ['as-12asdasd', -12] 34 | ]; 35 | each(tests).test('when passed "%s", it returns %d', (str, expected) => { 36 | expect(parse.float(str)).toBe(expected); 37 | }); 38 | }); 39 | 40 | describe('string', () => { 41 | const tests = [ 42 | ['', ''], 43 | ['should not change', 'should not change'], 44 | ['0', '0'], 45 | ['****test', 'test'], 46 | ['this is some text\nsecond line', 'this is some text second line'], 47 | ['this some test', 'this some test'], 48 | ['this is padded text ', 'this is padded text'], 49 | [' this is padded text', 'this is padded text'] 50 | ]; 51 | each(tests).test('when passed "%s", it returns "%s"', (str, expected) => { 52 | expect(parse.string(str)).toBe(expected); 53 | }); 54 | }); 55 | }); 56 | -------------------------------------------------------------------------------- /tools/generateColors.js: -------------------------------------------------------------------------------- 1 | import { generateAdaptiveTheme } from '@adobe/leonardo-contrast-colors'; 2 | import * as fs from '../lib/fs.js'; 3 | // returns theme colors as JSON 4 | function generateColors() { 5 | const cdsTheme = generateAdaptiveTheme({ 6 | baseScale: 'Gray', 7 | colorScales: [ 8 | { 9 | name: 'Gray', 10 | colorKeys: ['#355166'], 11 | colorspace: 'RGB', 12 | ratios: [-1.2, -1.05, 1, 1.1, 1.2, 1.5, 1.93, 3.01, 4.54, 8, 12.81] 13 | }, 14 | { 15 | name: 'Blue', 16 | colorKeys: ['#21385e', '#829800', '#007e60', '#efef00'], 17 | colorspace: 'LCH', 18 | ratios: [3.2, 4.8, 7.4, 11.7] 19 | } 20 | ] 21 | }); 22 | 23 | const cdsThemeLight = cdsTheme(97); 24 | 25 | const varPrefix = '--'; 26 | const cssVariables = {}; 27 | 28 | // Iterate each color object 29 | // for (let i = 0; i < cdsThemeLight.length; i++) { 30 | for (const colorTheme of cdsThemeLight) { 31 | console.log(cdsThemeLight); 32 | if (colorTheme.values) { 33 | for (const value of colorTheme.values) { 34 | // output "name" of color and prefix 35 | const key = value.name; 36 | const prop = varPrefix.concat(key); 37 | // create CSS property with name and value 38 | cssVariables[prop] = value.value; 39 | } 40 | } 41 | } 42 | 43 | const cssArray = Object.entries(cssVariables).map(v => { 44 | return `${v.join(': ')};\n`; 45 | }); 46 | const cssString = cssArray.toString().replace(/,/g, ''); 47 | 48 | fs.writeFile('./site/colors.css', `.spectrum {\n${cssString}}\n`); 49 | } 50 | generateColors(); 51 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy CI 2 | 3 | on: 4 | schedule: 5 | - cron: '0 4 * * *' # Runs at 9 PM PST every day (4:00 UTC) 6 | 7 | jobs: 8 | build: 9 | runs-on: [macOS-latest] 10 | steps: 11 | - uses: actions/checkout@v1 # without submodules 12 | with: 13 | persist-credentials: false 14 | - name: Disable the keychain credential helper 15 | run: git config --global credential.helper "" 16 | - name: Enable the local store credential helper 17 | run: git config --global --add credential.helper store 18 | - name: Add credential 19 | run: echo "https://x-access-token:${{ secrets.GIT_SECRET }}@github.com" >> ~/.git-credentials 20 | - name: Tell git to use https instead of ssh whenever it encounters it 21 | run: 'git config --global url."https://github.com/".insteadof git@github.com:' 22 | - name: Update submodules 23 | run: | 24 | git submodule update --init --recursive 25 | - name: Use Node.js 13.x 26 | uses: actions/setup-node@v1 27 | with: 28 | node-version: '13.x' 29 | - name: Install 30 | run: | 31 | yarn 32 | - name: Test 33 | run: | 34 | yarn test 35 | - name: Build 36 | run: | 37 | yarn build 38 | yarn dateData 39 | - name: Deploy 40 | uses: JamesIves/github-pages-deploy-action@releases/v3 41 | with: 42 | ACCESS_TOKEN: ${{ secrets.GIT_SECRET }} 43 | BRANCH: gh-pages # The branch the action should deploy to. 44 | FOLDER: dist # The folder the action should deploy. 45 | - name: Update cache 46 | run: | 47 | cd coronadatascraper-cache 48 | git add . 49 | git commit -m "Update cache following deployment" || echo "Nothing to push to cache" 50 | git push origin HEAD:master || echo "Nothing to push to cache" 51 | - name: Notify Slack 52 | env: 53 | SLACK_HOOK: ${{ secrets.SLACK_HOOK }} 54 | run: | 55 | curl -X POST -H 'Content-type: application/js' --data '{"text":"I just pushed a new release, go check it out at http://blog.lazd.net/coronadatascraper !"}' ${SLACK_HOOK} 56 | -------------------------------------------------------------------------------- /scripts/statusSlackBot.js: -------------------------------------------------------------------------------- 1 | import request from 'request'; 2 | import yargs from 'yargs'; 3 | 4 | import * as fs from '../lib/fs.js'; 5 | import * as datetime from '../lib/datetime.js'; 6 | 7 | const { argv } = yargs 8 | .scriptName('node ./scripts/statusSlackBot.js') 9 | .usage('$0 [args]') 10 | .command('send [hook]', 'sends report.json to provided Slack Hook', yargs => { 11 | yargs.positional('hook', { 12 | type: 'string' 13 | }); 14 | }) 15 | .help(); 16 | 17 | const generateReport = async report => { 18 | const { scrape, findFeatures, findPopulation } = report; 19 | 20 | return [ 21 | { 22 | type: 'section', 23 | text: { 24 | type: 'mrkdwn', 25 | text: `*Report for ${datetime.getDate().toUTCString()}:*` 26 | } 27 | }, 28 | { 29 | type: 'section', 30 | text: { 31 | type: 'mrkdwn', 32 | text: ` 33 | _Scrappers:_ 34 | - *${scrape.numCities}* cities 35 | - *${scrape.numCounties}* counties 36 | - *${scrape.numStates}* states 37 | - *${scrape.numCountries}* countries 38 | - *${scrape.numErrors}* scraper errors: 39 | ${scrape.errors.map(error => ` - ${error.name}: ${error.err}`).join('\n')}` 40 | } 41 | }, 42 | { 43 | type: 'section', 44 | text: { 45 | type: 'mrkdwn', 46 | text: ` 47 | _Features:_ 48 | - *${findFeatures.numFeaturesFound}* locations matched 49 | - *${findFeatures.missingFeatures.length}* locations with missing features` 50 | } 51 | }, 52 | { 53 | type: 'section', 54 | text: { 55 | type: 'mrkdwn', 56 | text: ` 57 | _Populations:_ 58 | - *${findPopulation.numLocationsWithPopulation}* locations matched 59 | - *${findPopulation.missingPopulations.length}* locations with missing populations` 60 | } 61 | }, 62 | { 63 | type: 'section', 64 | text: { 65 | type: 'mrkdwn', 66 | text: `Go to the data: http://blog.lazd.net/coronadatascraper` 67 | } 68 | } 69 | ]; 70 | }; 71 | 72 | const sendToSlack = async data => { 73 | request.post(argv.hook, { 74 | json: { 75 | blocks: data 76 | } 77 | }); 78 | }; 79 | 80 | fs.readJSON('./dist/report.json') 81 | .then(generateReport) 82 | .then(sendToSlack); 83 | -------------------------------------------------------------------------------- /lib/rating.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | 3 | /* 4 | Calculate the rating of a source 5 | info.type - the way the source presents data (see below) 6 | info.timeseries - true if timeseries provided, false if just latest 7 | info.$ - Each of the location fields defined by the source 8 | info.ssl - Whether the site has valid SSL (set to false if we have to work around it with certs) 9 | info.aggregate - String; one of city, county, state, country 10 | info.headless - Whether we need a headless browser to scrape this source 11 | */ 12 | function calculateRating(info) { 13 | const easeOfRead = { 14 | json: 1, 15 | csv: 1, 16 | table: 0.75, 17 | list: 0.5, 18 | paragraph: 0.25, 19 | pdf: 0, 20 | image: -1 21 | }; 22 | 23 | const timeseries = { 24 | false: 0, // everyone's got the latest 25 | true: 1.75 // but timeseries is worth a lot 26 | }; 27 | 28 | const completeness = { 29 | cases: 0.5, 30 | tested: 1, 31 | deaths: 1, 32 | recovered: 1, 33 | country: 0.5, 34 | state: 0.5, 35 | county: 1, 36 | city: 0.5 37 | }; 38 | 39 | const aggregateWorth = 1.5; 40 | const headlessWorth = -0.5; 41 | const sslWorth = 0.25; 42 | 43 | // Give credit for completeness 44 | let rating = 0; 45 | for (const field in completeness) { 46 | if (info[field] !== null && info[field] !== undefined) { 47 | rating += completeness[field]; 48 | } 49 | } 50 | 51 | // Auto-detect JSON and CSV 52 | if (!info.type && easeOfRead[path.extname(info.url).substr(1)]) { 53 | info.type = path.extname(info.url).substr(1); 54 | } 55 | 56 | if (info.url.substr(0, 5) === 'https' && info.ssl !== false) { 57 | info.ssl = true; 58 | rating += sslWorth; 59 | } 60 | 61 | // Dock some points if we have to go headless 62 | if (info.headless) { 63 | rating += headlessWorth; 64 | } 65 | 66 | // Aggregate sources are gold 67 | if (info.aggregate) { 68 | rating += aggregateWorth; 69 | 70 | // Give points for what that data contains (higher level should already be given above) 71 | rating += completeness[info.aggregate]; 72 | } 73 | 74 | // Assume it's a list 75 | if (!info.type) { 76 | info.type = 'list'; 77 | } 78 | rating += easeOfRead[info.type]; 79 | 80 | rating += timeseries[!!info.timeseries]; 81 | 82 | // Calculate highest possible rating 83 | const possible = 84 | Object.values(completeness).reduce((a, v) => a + v, 0) + 85 | timeseries.true + 86 | aggregateWorth + 87 | Object.values(easeOfRead) 88 | .sort() 89 | .pop(); 90 | 91 | return rating / possible; 92 | } 93 | 94 | export default calculateRating; 95 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "coronadatascraper", 3 | "version": "1.0.0", 4 | "description": "A scraper for Coronavirus data", 5 | "main": "cli.js", 6 | "author": "Larry Davis ", 7 | "license": "BSD", 8 | "type": "module", 9 | "engines": { 10 | "node": ">=13.4.0" 11 | }, 12 | "scripts": { 13 | "dev": "yarn buildSite & watch 'yarn buildSite' site & yarn serveSite", 14 | "serveSite": "npx npx live-server dist", 15 | "build": "yarn start && yarn timeseries && yarn buildSite", 16 | "buildSite": "mkdir -p dist/; cp -r site/* dist/ && ./tools/copySpectrumCSS.sh", 17 | "dateData": "DATE=$(date +'%Y-%m-%d') && cp dist/data.json dist/data-$DATE.json && cp dist/features.json dist/features-$DATE.json && cp dist/data.csv dist/data-$DATE.csv", 18 | "deploy": "npm run dateData && npm run deploySite", 19 | "deploySite": "npm run buildSite && gh-pages -a -d dist/ -e .", 20 | "lint": "eslint .", 21 | "start": "NODE_OPTIONS='--insecure-http-parser' node cli.js", 22 | "test": "jest", 23 | "timeseries": "NODE_OPTIONS='--insecure-http-parser' node timeseries.js", 24 | "update": "npm run updateModules && rm -rf cache/* && npm run start", 25 | "updateModules": "git submodule update --remote", 26 | "winStart": "set NODE_OPTIONS='--insecure-http-parser' && node cli.js" 27 | }, 28 | "dependencies": { 29 | "@adobe/leonardo-contrast-colors": "^1.0.0-alpha.4", 30 | "@adobe/spectrum-css": "^2.18.0", 31 | "@turf/turf": "^5.1.6", 32 | "cheerio": "^1.0.0-rc.3", 33 | "csv-parse": "^4.8.8", 34 | "csv-stringify": "^5.3.6", 35 | "editorconfig": "^0.15.3", 36 | "gh-pages": "^2.2.0", 37 | "jest-each": "^25.1.0", 38 | "mock-fs": "^4.11.0", 39 | "live-server": "^1.2.1", 40 | "needle": "^2.3.3", 41 | "puppeteer": "^2.1.1", 42 | "request": "^2.88.2", 43 | "watch": "^1.0.2", 44 | "yargs": "^15.3.1" 45 | }, 46 | "devDependencies": { 47 | "@babel/core": "^7.8.7", 48 | "@babel/preset-env": "^7.8.7", 49 | "babel-eslint": "^10.1.0", 50 | "babel-jest": "^25.1.0", 51 | "eslint": "^6.8.0", 52 | "eslint-config-airbnb-base": "^14.1.0", 53 | "eslint-config-prettier": "^6.10.0", 54 | "eslint-plugin-import": "^2.20.1", 55 | "eslint-plugin-jest": "^23.8.2", 56 | "eslint-plugin-prettier": "^3.1.2", 57 | "husky": "^4.2.3", 58 | "jest": "^25.1.0", 59 | "lint-staged": "^10.0.8", 60 | "prettier": "^1.19.1" 61 | }, 62 | "husky": { 63 | "hooks": { 64 | "pre-commit": "lint-staged", 65 | "pre-push": "npm run lint" 66 | } 67 | }, 68 | "lint-staged": { 69 | "*.js": [ 70 | "eslint --fix", 71 | "git add" 72 | ] 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /site/icons/fonts/icomoon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Generated by IcoMoon 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /lib/README.md: -------------------------------------------------------------------------------- 1 | # Library 2 | 3 | A number of functions are available in separate modules to help write parsers. 4 | 5 | ## `fetch` 6 | 7 | `import * as fetch from './lib/fetch.js'` 8 | 9 | 10 | #### `await fetch.page(url)` 11 | 12 | Load the webpage at the given URL and return a Cheerio object 13 | 14 | **Returns:** [Cheerio](https://cheerio.js.org/) object 15 | 16 | 17 | #### `await fetch.json(url)` 18 | 19 | Load and parse JSON from the given URL 20 | 21 | **Returns:** Object 22 | 23 | 24 | #### `await fetch.csv(url)` 25 | 26 | Load and parse CSV from the given URL 27 | 28 | **Returns:** Array 29 | 30 | 31 | #### `await fetch.fetch(url)` 32 | 33 | Fetch whatever is at the given URL. If the URL has been downloaded before, it will be loaded from `cache/$MD5.ext`. 34 | 35 | **Returns:** String 36 | 37 | ## `fs` 38 | 39 | `import * as fs from './lib/fs.js'` 40 | 41 | 42 | #### `await fs.exists(filePath)` 43 | 44 | Check if a file exists 45 | 46 | **Returns:** Boolean 47 | 48 | 49 | #### `await fs.readFile(filePath)` 50 | 51 | Read a file 52 | 53 | 54 | #### `await fs.readJSON(filePath)` 55 | 56 | Read and parse JSON 57 | 58 | 59 | #### `await fs.readCSV(filePath)` 60 | 61 | Read and parse CSV 62 | 63 | 64 | #### `await fs.writeFile(filePath, data)` 65 | 66 | Write the given data to the given file path 67 | 68 | 69 | #### `await fs.writeJSON(filePath, data)` 70 | 71 | Write the given JSON to the given file path, formatted nicely 72 | 73 | 74 | ## `parse` 75 | 76 | `import * as parse from './lib/parse.js'` 77 | 78 | #### `parse.number(string)` 79 | Turn the provided string into a number, ignoring non-numeric data 80 | 81 | **Returns:** Number 82 | 83 | 84 | #### `parse.float(string)` 85 | Turn the provided string into a floating point number 86 | 87 | **Returns:** Float 88 | 89 | 90 | #### `parse.string(string)` 91 | Remove line breaks, double spaces, zero-width space, asterisk, and trim the provided stirng 92 | 93 | **Returns:** String 94 | 95 | 96 | ## `rules` 97 | 98 | `import * as rules from './lib/rules.js'` 99 | 100 | #### `rules.isAcceptable(data, acceptCriteria, rejectCriteria)` 101 | 102 | Given a data object and arrays of criteria with keys/values that match the data object, 103 | determine if the object is acceptable. 104 | 105 | If the object has keys that match everything in at least one rejection criteria, it is rejected 106 | If the object has keys that match everything in at least one acceptance criteria, it's accepted 107 | 108 | ## `transform` 109 | 110 | `import * as transform from './lib/transform.js'` 111 | 112 | #### `transform.objectToArray(object)` 113 | Convert an object keyed on county name to an array 114 | 115 | **Returns:** Array 116 | 117 | 118 | #### `transform.addCounty(string)` 119 | Append ' County' to the end of a string, if not already present 120 | 121 | **Returns:** String 122 | 123 | 124 | #### `transform.getName(location)` 125 | Get the full name of a location 126 | 127 | **Returns:** String 128 | 129 | 130 | #### `transform.hash(string)` 131 | MD5 hash a given string 132 | 133 | **Returns:** String 134 | 135 | -------------------------------------------------------------------------------- /lib/caching.js: -------------------------------------------------------------------------------- 1 | /** 2 | * This file contains the caching implementation. We provide caching to reduce strain on official data sources 3 | * and to store changes to each source on a day to day basis. 4 | */ 5 | 6 | import path from 'path'; 7 | import crypto from 'crypto'; 8 | 9 | import * as datetime from './datetime.js'; 10 | import * as fs from './fs.js'; 11 | 12 | const DEFAULT_CACHE_PATH = 'coronadatascraper-cache'; 13 | const TIMESERIES_CACHE_PATH = 'cache'; 14 | 15 | export const CACHE_MISS = null; 16 | export const RESOURCE_UNAVAILABLE = undefined; 17 | 18 | /** 19 | MD5 hash a given string 20 | */ 21 | const hash = str => { 22 | return crypto 23 | .createHash('md5') 24 | .update(str) 25 | .digest('hex'); 26 | }; 27 | 28 | /** 29 | * Get the path of cache for the given URL at the given date 30 | * @param {*} url URL of the cached resource 31 | * @param {*} type type of the cached resource 32 | * @param {*} date the date associated with this resource, or false if a timeseries data 33 | */ 34 | export const getCachedFilePath = (url, type, date = false) => { 35 | // This data probably has its own timeseries in it 36 | // Use local cache, assumed to be recent 37 | const cachePath = date === false ? TIMESERIES_CACHE_PATH : path.join(DEFAULT_CACHE_PATH, date); 38 | 39 | const urlHash = hash(url); 40 | const extension = type || path.extname(url) || 'txt'; 41 | 42 | const filePath = path.join(cachePath, `${urlHash}.${extension}`); 43 | 44 | return filePath; 45 | }; 46 | 47 | /** 48 | Get the cache for the following URL at a given date. 49 | 50 | If the date requested is before today, and no cache is available, we will be unable to fetch this URL, hence 51 | the function returns `RESOURCE_UNAVAILABLE`. 52 | 53 | If we are able to fetch this URL (because it is a timeseries or we are requesting today's data), the function 54 | returns `CACHE_MISS`. 55 | 56 | * @param {*} url URL of the cached resource 57 | * @param {*} type type of the cached resource 58 | * @param {*} date the date associated with this resource, or false if a timeseries data 59 | */ 60 | export const getCachedFile = async (url, type, date) => { 61 | const filePath = getCachedFilePath(url, type, date); 62 | 63 | if (await fs.exists(filePath)) { 64 | console.log(' ⚡️ Cache hit for %s from %s', url, filePath); 65 | return fs.readFile(filePath); 66 | } 67 | if (date && datetime.dateIsBefore(new Date(date), datetime.getDate())) { 68 | console.log(' ⚠️ Cannot go back in time to get %s, no cache present', url); 69 | return RESOURCE_UNAVAILABLE; 70 | } 71 | console.log(' 🐢 Cache miss for %s at %s', url, filePath); 72 | return CACHE_MISS; 73 | }; 74 | 75 | /** 76 | * Saves a file to cache, at the provided date 77 | * 78 | * @param {*} url URL of the cached resource 79 | * @param {*} type type of the cached resource 80 | * @param {*} date the date associated with this resource, or false if a timeseries data 81 | * @param {*} data file data to be saved 82 | */ 83 | export const saveFileToCache = async (url, type, date, data) => { 84 | const filePath = getCachedFilePath(url, type, date); 85 | return fs.writeFile(filePath, data, { silent: true }); 86 | }; 87 | -------------------------------------------------------------------------------- /lib/fs.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import path from 'path'; 3 | import csvParse from 'csv-parse'; 4 | import csvStringify from 'csv-stringify'; 5 | 6 | /** 7 | Check if a file or directory exists 8 | */ 9 | export const exists = async filePath => { 10 | return fs.existsSync(filePath); 11 | }; 12 | 13 | /** 14 | When given a missing directory, it creates it 15 | */ 16 | export const ensureDir = async dirPath => { 17 | if (dirPath) { 18 | if (!(await exists(dirPath))) { 19 | await fs.promises.mkdir(dirPath, { recursive: true }); 20 | } 21 | } 22 | }; 23 | 24 | /** 25 | * Returns the list of files at the given directory path 26 | * 27 | * @throws ENOENT if directory does not exist 28 | */ 29 | export const getFilesInDir = async dirPath => { 30 | return fs.promises.readdir(dirPath); 31 | }; 32 | 33 | /** 34 | Read and return the file's content at a given path. 35 | 36 | @throws ENOENT if file does not exists. 37 | */ 38 | export const readFile = async filePath => { 39 | return fs.promises.readFile(filePath, 'utf8'); 40 | }; 41 | 42 | /** 43 | Read and return a JSON file at a given path. 44 | 45 | @throws ENOENT if file does not exists. 46 | */ 47 | export const readJSON = async filePath => { 48 | return JSON.parse(await readFile(filePath)); 49 | }; 50 | 51 | /** 52 | Read and return a CSV file at a given path. 53 | 54 | @throws ENOENT if file does not exists. 55 | */ 56 | export const readCSV = async filePath => { 57 | return new Promise(async (resolve, reject) => { 58 | try { 59 | const data = await readFile(filePath); 60 | 61 | csvParse( 62 | data, 63 | { 64 | columns: true 65 | }, 66 | function(err, output) { 67 | if (err) { 68 | reject(err); 69 | } else { 70 | resolve(output); 71 | } 72 | } 73 | ); 74 | } catch (e) { 75 | reject(e); 76 | } 77 | }); 78 | }; 79 | 80 | /** 81 | * Write data to a file 82 | * 83 | * @param {*} filePath path to write 84 | * @param {*} data data to write 85 | * @param {*} options 86 | * - silent: the function will not write to console on write success 87 | * - ensureDir: creates a directory if it is missing 88 | */ 89 | export const writeFile = async (filePath, data, options = {}) => { 90 | options = { silent: false, ensureDir: true, ...options }; 91 | 92 | if (options.ensureDir) { 93 | await ensureDir(path.dirname(filePath)); 94 | } 95 | 96 | const ret = await fs.promises.writeFile(filePath, data); 97 | 98 | if (!options.silent) { 99 | console.log(`✏️ ${filePath} written`); 100 | } 101 | 102 | return ret; 103 | }; 104 | 105 | /** 106 | Write JSON to a file 107 | */ 108 | export const writeJSON = async (filePath, data, options = {}) => { 109 | return writeFile(filePath, JSON.stringify(data, null, 2), options); 110 | }; 111 | 112 | /** 113 | Write CSV to a file 114 | */ 115 | export const writeCSV = async (filePath, data, options = {}) => { 116 | return new Promise(async (resolve, reject) => { 117 | csvStringify(data, (err, output) => { 118 | if (err) { 119 | reject(err); 120 | } else { 121 | resolve(writeFile(filePath, output, options)); 122 | } 123 | }); 124 | }); 125 | }; 126 | -------------------------------------------------------------------------------- /lib/fetch.test.js: -------------------------------------------------------------------------------- 1 | import * as mockFs from './__test_utils__/fs.js'; 2 | 3 | import * as fetch from './fetch.js'; 4 | import * as caching from './caching.js'; 5 | 6 | describe('Fetching', () => { 7 | beforeAll(() => { 8 | jest.setTimeout(30000); 9 | }); 10 | 11 | afterAll(() => { 12 | jest.setTimeout(5000); 13 | }); 14 | 15 | describe('fetch', () => { 16 | test('when fetching an example url, we get the correct data back', async () => { 17 | const key = 'foo1'; 18 | const value = 'bar1'; 19 | 20 | mockFs.mock({}); 21 | 22 | const body = await fetch.fetch(`https://postman-echo.com/get?${key}=${value}`, 'json'); 23 | 24 | const jsonBody = JSON.parse(body); 25 | 26 | expect(jsonBody.args[key]).toBe(value); 27 | 28 | mockFs.restore(); 29 | }); 30 | 31 | test('when fetching a URL in cache, we get the cached version', async () => { 32 | const wrongKey = 'foo1'; 33 | const wrongValue = 'bar1'; 34 | 35 | const key = 'foo2'; 36 | const value = 'bar2'; 37 | 38 | const url = `https://postman-echo.com/get?${wrongKey}=${wrongValue}`; 39 | const type = 'json'; 40 | const date = false; 41 | 42 | // Simulate a cached version of this file 43 | mockFs.mock({ 44 | [caching.getCachedFilePath(url, type, date)]: `{"args": {"${key}": "${value}"}}` 45 | }); 46 | 47 | const body = await fetch.fetch(url, type, date); 48 | 49 | const jsonBody = JSON.parse(body); 50 | 51 | expect(jsonBody.args[key]).toBe(value); 52 | 53 | mockFs.restore(); 54 | }); 55 | 56 | test('when fetching a URL in cache with alwaysRun=true, we fetch a new version', async () => { 57 | const wrongKey = 'foo1'; 58 | const wrongValue = 'bar1'; 59 | 60 | const key = 'foo2'; 61 | const value = 'bar2'; 62 | 63 | const url = `https://postman-echo.com/get?${key}=${value}`; 64 | const type = 'json'; 65 | const date = false; 66 | 67 | // Simulate a cached version of this file 68 | mockFs.mock({ 69 | [caching.getCachedFilePath(url, type, date)]: `{"args": {"${wrongKey}": "${wrongValue}"}}` 70 | }); 71 | 72 | const body = await fetch.fetch(url, type, date, { alwaysRun: true }); 73 | 74 | const jsonBody = JSON.parse(body); 75 | 76 | expect(jsonBody.args[key]).toBe(value); 77 | 78 | mockFs.restore(); 79 | }); 80 | 81 | test('when fetching a URL with disableSSL=true, fetching should work', async () => { 82 | const key = 'foo2'; 83 | const value = 'bar2'; 84 | 85 | const url = `https://postman-echo.com/get?${key}=${value}`; 86 | const type = 'json'; 87 | const date = false; 88 | 89 | // Simulate a cached version of this file 90 | mockFs.mock({}); 91 | 92 | const body = await fetch.fetch(url, type, date, { alwaysRun: true, disableSSL: true }); 93 | 94 | const jsonBody = JSON.parse(body); 95 | 96 | expect(jsonBody.args[key]).toBe(value); 97 | 98 | mockFs.restore(); 99 | }); 100 | }); 101 | 102 | describe('headless', () => { 103 | test('when fetching an example url, we get the correct data back', async () => { 104 | const key = 'foo1'; 105 | const value = 'bar1'; 106 | 107 | mockFs.restore(); 108 | const body = (await fetch.headless(`https://postman-echo.com/get?${key}=${value}`, 'json', { alwaysRun: true })).text(); 109 | 110 | const jsonBody = JSON.parse(body); 111 | 112 | expect(jsonBody.args[key]).toBe(value); 113 | }); 114 | }); 115 | }); 116 | -------------------------------------------------------------------------------- /lib/datetime.js: -------------------------------------------------------------------------------- 1 | /* 2 | Get a date object offset for the current timezone 3 | */ 4 | function getDate() { 5 | const date = new Date(); 6 | const utcDate = new Date(date.toUTCString()); 7 | utcDate.setHours(utcDate.getHours() - 7); 8 | return new Date(utcDate); 9 | } 10 | 11 | /* 12 | Get date formatted in YYYY-M-D 13 | */ 14 | function getYYYYMD(date = getDate(), sep = '-') { 15 | const month = date.getUTCMonth() + 1; 16 | const day = date.getUTCDate(); 17 | const year = date.getUTCFullYear(); 18 | 19 | return `${year}${sep}${month}${sep}${day}`; 20 | } 21 | 22 | /* 23 | Get date formatted in YYYY-M-D 24 | */ 25 | function getYYYYMMDD(date = getDate(), sep = '-') { 26 | const month = (date.getUTCMonth() + 1).toString().padStart(2, '0'); 27 | const day = date 28 | .getUTCDate() 29 | .toString() 30 | .padStart(2, '0'); 31 | const year = date.getUTCFullYear(); 32 | 33 | return `${year}${sep}${month}${sep}${day}`; 34 | } 35 | 36 | /* 37 | Get date formatted in DD-MM-YYYY 38 | */ 39 | function getDDMMYYYY(date = getDate(), sep = '-') { 40 | const month = (date.getUTCMonth() + 1).toString().padStart(2, '0'); 41 | const day = date 42 | .getUTCDate() 43 | .toString() 44 | .padStart(2, '0'); 45 | const year = date.getUTCFullYear(); 46 | 47 | return `${day}${sep}${month}${sep}${year}`; 48 | } 49 | 50 | /* 51 | Get date formatted in M/D/YYYY 52 | */ 53 | function getMDYYYY(date = getDate(), sep = '/') { 54 | const month = date.getUTCMonth() + 1; 55 | const day = date.getUTCDate(); 56 | const year = date.getUTCFullYear(); 57 | 58 | return `${month}${sep}${day}${sep}${year}`; 59 | } 60 | 61 | /* 62 | Get date formatted in M/D/YY 63 | */ 64 | function getMDYY(date = getDate(), sep = '/') { 65 | const month = date.getUTCMonth() + 1; 66 | const day = date.getUTCDate(); 67 | const year = date 68 | .getUTCFullYear() 69 | .toString() 70 | .substr(2, 2); 71 | 72 | return `${month}${sep}${day}${sep}${year}`; 73 | } 74 | 75 | /* 76 | Check of the *date* of the passed date is before the other passed date 77 | *sigh* 78 | */ 79 | function dateIsBefore(a, b) { 80 | a = new Date(a); 81 | b = new Date(b); 82 | a.setHours(0, 0, 0, 0); 83 | b.setHours(0, 0, 0, 0); 84 | return a < b; 85 | } 86 | 87 | /* 88 | Check of the *date* of the passed date is before or equal to the other passed date 89 | *sigh* 90 | */ 91 | function dateIsBeforeOrEqualTo(a, b) { 92 | a = new Date(a); 93 | b = new Date(b); 94 | a.setHours(0, 0, 0, 0); 95 | b.setHours(0, 0, 0, 0); 96 | return a < b || a.getTime() === b.getTime(); 97 | } 98 | 99 | /* 100 | Check if the date we're scraping is before the passed date 101 | */ 102 | function scrapeDateIsBefore(date) { 103 | let scrapeDate = getDate(); 104 | if (process.env.SCRAPE_DATE) { 105 | scrapeDate = new Date(process.env.SCRAPE_DATE); 106 | } 107 | return dateIsBefore(scrapeDate, new Date(date)); 108 | } 109 | 110 | /* 111 | Check if the date we're scraping is after the passed date 112 | */ 113 | function scrapeDateIsAfter(date) { 114 | let scrapeDate = getDate(); 115 | if (process.env.SCRAPE_DATE) { 116 | scrapeDate = new Date(process.env.SCRAPE_DATE); 117 | } 118 | return dateIsBefore(new Date(date), scrapeDate); 119 | } 120 | 121 | /* 122 | Check if the date we're scraping is equal to the passed date 123 | */ 124 | function scrapeDateIs(date) { 125 | let scrapeDate = getDate(); 126 | if (process.env.SCRAPE_DATE) { 127 | scrapeDate = new Date(process.env.SCRAPE_DATE); 128 | } 129 | 130 | const compareDate = new Date(date); 131 | scrapeDate.setHours(0, 0, 0, 0); 132 | compareDate.setHours(0, 0, 0, 0); 133 | 134 | return compareDate.getTime() === scrapeDate.getTime(); 135 | } 136 | 137 | export { getDate, getYYYYMD, getYYYYMMDD, getMDYYYY, getMDYY, getDDMMYYYY, dateIsBefore, scrapeDateIsBefore, scrapeDateIsAfter, scrapeDateIs, dateIsBeforeOrEqualTo }; 138 | -------------------------------------------------------------------------------- /lib/caching.test.js: -------------------------------------------------------------------------------- 1 | import each from 'jest-each'; 2 | import * as mockFs from './__test_utils__/fs.js'; 3 | 4 | import * as caching from './caching.js'; 5 | import * as datetime from './datetime.js'; 6 | import * as fs from './fs.js'; 7 | 8 | describe('caching', () => { 9 | describe('getCachedFilePath', () => { 10 | each([ 11 | ['2020-03-01', /coronadatascraper-cache\/2020-03-01\/.*/], 12 | ['2020-02-03', /coronadatascraper-cache\/2020-02-03\/.*/], 13 | [false, /cache\/.*/] 14 | ]).test('when given date "%s", it returns the directory "%s"', (date, expected) => { 15 | expect(caching.getCachedFilePath('http://example.com', 'json', date)).toMatch(expected); 16 | }); 17 | 18 | each([ 19 | ['json', /.json$/g], 20 | ['csv', /.csv$/g], 21 | ['txt', /.txt$/g], 22 | ['html', /.html$/g] 23 | ]).test('when given extension "%s", it is present in the the cached file path', (ext, expected) => { 24 | expect(caching.getCachedFilePath('http://example.com', ext, '2020-03-16')).toMatch(expected); 25 | }); 26 | 27 | each([ 28 | ['https://example.com/somefile', 'html', false], 29 | ['https://example.com/testing', 'csv', '2020-03-06'] 30 | ]).test('when given same parameters, we get the same output', (host, ext, date) => { 31 | expect(caching.getCachedFilePath(host, ext, date)).toBe(caching.getCachedFilePath(host, ext, date)); 32 | }); 33 | }); 34 | 35 | describe('getCachedFile', () => { 36 | each([ 37 | ['https://example.com/somefile', 'html', false], 38 | ['https://example.com/testing', 'csv', '2020-03-06'] 39 | ]).test('when given a file that has been cached, it returns the file', async (host, ext, date) => { 40 | const fileContent = 'a test file'; 41 | 42 | // Create a mock file system with the expected cached file 43 | mockFs.mock({ 44 | [caching.getCachedFilePath(host, ext, date)]: fileContent 45 | }); 46 | 47 | const outputFile = await caching.getCachedFile(host, ext, date); 48 | 49 | expect(outputFile).toBe(fileContent); 50 | 51 | // Don't forget to restore! 52 | mockFs.restore(); 53 | }); 54 | 55 | each([ 56 | ['https://example.com/somefile', 'html', false], 57 | // For this example, we request a file for today. A cache miss for a file before today has a different behavior 58 | ['https://example.com/testing', 'csv', datetime.getYYYYMD()] 59 | ]).test('when given a file that has been not been cached for today, it returns CACHE_MISS', async (host, ext, date) => { 60 | const outputFile = await caching.getCachedFile(host, ext, date); 61 | expect(outputFile).toBe(caching.CACHE_MISS); 62 | }); 63 | 64 | each([ 65 | // For this example, we request a file for a day before today, meaning the resource cannot be fetched if not cached 66 | ['https://example.com/somefile', 'html', '2019-5-16'], 67 | ['https://example.com/testing', 'csv', '2020-1-2'] 68 | ]).test('when given a file that has been not been cached with a date before today, it returns RESOURCE_UNAVAILABLE', async (host, ext, date) => { 69 | const outputFile = await caching.getCachedFile(host, ext, date); 70 | expect(outputFile).toBe(caching.RESOURCE_UNAVAILABLE); 71 | }); 72 | }); 73 | 74 | describe('saveFileToCache', () => { 75 | each([ 76 | ['https://example.com/somefile', 'html', false], 77 | // For this example, we request a file for today. A cache miss for a file before today has a different behavior 78 | ['https://example.com/testing', 'csv', '2020-1-2'] 79 | ]).test('when given a file, it saves it to cache', async (host, ext, date) => { 80 | const fileContent = 'a test file'; 81 | 82 | // Create a mock file system with the expected cached file 83 | mockFs.mock({}); 84 | 85 | await caching.saveFileToCache(host, ext, date, fileContent); 86 | 87 | const expectedFileContent = await fs.readFile(caching.getCachedFilePath(host, ext, date)); 88 | 89 | expect(expectedFileContent).toBe(fileContent); 90 | 91 | // Don't forget to restore! 92 | mockFs.restore(); 93 | }); 94 | }); 95 | }); 96 | -------------------------------------------------------------------------------- /site/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 55 | -------------------------------------------------------------------------------- /tasks/findPopulations.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import * as fs from '../lib/fs.js'; 3 | import * as transform from '../lib/transform.js'; 4 | 5 | const dataPath = path.join('./coronavirus-data-sources/population/'); 6 | 7 | /* 8 | Read population data from a CSV with correct datatypes 9 | */ 10 | async function readPopulationFromCSV(csvPath) { 11 | const output = await fs.readCSV(path.resolve(dataPath, csvPath)); 12 | const populationData = {}; 13 | for (const item of output) { 14 | if (item.population) { 15 | populationData[item.name] = parseInt(item.population, 10); 16 | } else { 17 | throw new Error(`Invalid data in ${csvPath} for ${item.name}`); 18 | } 19 | } 20 | 21 | return populationData; 22 | } 23 | 24 | async function readPopulationData(featureCollection) { 25 | const populations = { 26 | byCity: {}, 27 | byCounty: { 28 | USA: await readPopulationFromCSV('population-usa-counties.csv'), 29 | GBR: await readPopulationFromCSV('population-gbr-counties.csv') 30 | }, 31 | byState: { 32 | China: await readPopulationFromCSV('population-china-admin-divisions.csv'), 33 | Australia: await readPopulationFromCSV('population-australia-states.csv'), 34 | Canada: await readPopulationFromCSV('population-canada-provinces.csv'), 35 | Italy: await readPopulationFromCSV('population-italy-regions.csv'), 36 | USA: await readPopulationFromCSV('population-usa-states-abbrev.csv'), 37 | Brazil: await readPopulationFromCSV('population-brazil-states-abbrev.csv') 38 | }, 39 | byCountry: {}, 40 | supplemental: await readPopulationFromCSV('population-supplemental.csv') 41 | }; 42 | 43 | populations.byState.CHN = populations.byState.China; 44 | populations.byState.CAN = populations.byState.Canada; 45 | populations.byState.ITA = populations.byState.Italy; 46 | populations.byState.AUS = populations.byState.Australia; 47 | populations.byState.BRA = populations.byState.Brazil; 48 | 49 | // Store data from features 50 | for (const feature of featureCollection.features) { 51 | if (feature.properties.pop_est) { 52 | populations.byCountry[feature.properties.name] = feature.properties.pop_est; 53 | if (feature.properties.name_en) { 54 | populations.byCountry[feature.properties.name_en] = feature.properties.pop_est; 55 | } 56 | if (feature.properties.abbrev) { 57 | populations.byCountry[feature.properties.abbrev.replace(/\./g, '')] = feature.properties.pop_est; 58 | } 59 | } 60 | } 61 | 62 | return populations; 63 | } 64 | 65 | const generatePopulations = async ({ locations, featureCollection, report, options, sourceRatings }) => { 66 | console.log('⏳ Getting population data...'); 67 | 68 | const populations = await readPopulationData(featureCollection); 69 | 70 | function getPopulation(location) { 71 | let population = null; 72 | 73 | if (location.city) { 74 | // Use either city by country or city by state 75 | let populationSource = populations.byCity[location.country]; 76 | if (populationSource && populationSource[location.state]) { 77 | populationSource = populationSource[location.state]; 78 | } 79 | if (populationSource && populationSource[location.state]) { 80 | population = populationSource[location.city]; 81 | } 82 | } else if (location.county) { 83 | if (populations.byCounty[location.country]) { 84 | // Try counties 85 | const populationSource = populations.byCounty[location.country]; 86 | const countyNameReplaced = location.county.replace('Parish', 'County'); 87 | const countyNameJoined = `${location.county}, ${location.state}`; 88 | const countyNameReplacedJoined = `${countyNameReplaced}, ${location.state}`; 89 | 90 | population = populationSource[location.county] || populationSource[countyNameReplaced] || populationSource[countyNameJoined] || populationSource[countyNameReplacedJoined]; 91 | } 92 | } else if (location.state) { 93 | if (populations.byState[location.country] && populations.byState[location.country][location.state]) { 94 | // Try states 95 | population = populations.byState[location.country][location.state]; 96 | } 97 | } else { 98 | // Try countries 99 | population = populations.byCountry[location.country]; 100 | } 101 | 102 | if (!population) { 103 | population = populations.supplemental[location.city]; 104 | } 105 | 106 | if (!population) { 107 | population = populations.supplemental[location.county]; 108 | } 109 | 110 | if (!population) { 111 | population = populations.supplemental[location.state]; 112 | } 113 | 114 | if (!population) { 115 | population = populations.supplemental[location.country]; 116 | } 117 | 118 | if (!population) { 119 | if (location.featureId) { 120 | const feature = featureCollection.features[location.featureId]; 121 | if (feature.properties.pop_est) { 122 | population = feature.properties.pop_est; 123 | } 124 | } 125 | } 126 | 127 | return population; 128 | } 129 | 130 | const errors = []; 131 | 132 | let populationFound = 0; 133 | for (const location of locations) { 134 | if (location.population) { 135 | continue; 136 | } 137 | 138 | const population = getPopulation(location); 139 | 140 | if (population) { 141 | location.population = population; 142 | populationFound++; 143 | } else { 144 | console.error(' ❌ %s: ?', transform.getName(location)); 145 | errors.push(transform.getName(location)); 146 | } 147 | } 148 | console.log('✅ Found population data for %d out of %d locations', populationFound, Object.keys(locations).length); 149 | 150 | report.findPopulation = { 151 | numLocationsWithPopulation: populationFound, 152 | missingPopulations: errors 153 | }; 154 | 155 | return { locations, featureCollection, report, options, sourceRatings }; 156 | }; 157 | 158 | export default generatePopulations; 159 | -------------------------------------------------------------------------------- /lib/transform.js: -------------------------------------------------------------------------------- 1 | import crypto from 'crypto'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | import * as datetime from './datetime.js'; 5 | 6 | const usStates = JSON.parse(fs.readFileSync(path.join('coronavirus-data-sources', 'lib', 'us-states.json'), 'utf8')); 7 | 8 | const countryCodes = JSON.parse(fs.readFileSync(path.join('coronavirus-data-sources', 'ISO-3166-Countries-with-Regional-Codes', 'slim-3', 'slim-3.json'), 'utf8')); 9 | 10 | /* 11 | Override some incorrect country names 12 | */ 13 | const countryMap = { 14 | Kosovo: 'XKX', 15 | 'Congo (Kinshasa)': 'Congo, Democratic Republic of the', 16 | "Cote d'Ivoire": "Côte d'Ivoire", 17 | Russia: 'Russian Federation', 18 | Vietnam: 'Viet Nam', 19 | 'Korea, South': 'Korea, Republic of', 20 | 'South Korea': 'Korea, Republic of', 21 | 'North Korea': "Korea (Democratic People's Republic of)", 22 | Brunei: 'Brunei Darussalam', 23 | Reunion: 'Réunion', 24 | Curacao: 'Curaçao', 25 | 'United Kingdom': 'GBR', 26 | 'occupied Palestinian territory': 'PSE', 27 | 'Congo (Brazzaville)': 'COG' 28 | }; 29 | 30 | /* 31 | Convert an object keyed on county name to an array 32 | */ 33 | function objectToArray(object) { 34 | const array = []; 35 | for (const [county, data] of Object.entries(object)) { 36 | array.push({ 37 | county, 38 | ...data 39 | }); 40 | } 41 | return array; 42 | } 43 | 44 | /* 45 | Append ' County' to the end of a string, if not already present 46 | */ 47 | function addCounty(string) { 48 | if (!string.match(/ County$/)) { 49 | string += ' County'; 50 | } 51 | return string; 52 | } 53 | 54 | /* 55 | Get the full name of a location 56 | */ 57 | function getName(location) { 58 | let name = ''; 59 | let sep = ''; 60 | if (location.city) { 61 | name += location.city; 62 | sep = ', '; 63 | } 64 | if (location.county) { 65 | name += sep + location.county; 66 | sep = ', '; 67 | } 68 | if (location.state) { 69 | name += sep + location.state; 70 | sep = ', '; 71 | } 72 | if (location.country) { 73 | name += sep + location.country; 74 | sep = ', '; 75 | } 76 | return name; 77 | } 78 | 79 | /* 80 | MD5 hash a given string 81 | */ 82 | function hash(string) { 83 | return crypto 84 | .createHash('md5') 85 | .update(string) 86 | .digest('hex'); 87 | } 88 | 89 | /* 90 | Normalize the state as a 2-letter string 91 | */ 92 | function toUSStateAbbreviation(string) { 93 | return usStates[string] || string; 94 | } 95 | 96 | /* 97 | Normalize the state as a 2-letter string 98 | */ 99 | function toISO3166Alpha3(string) { 100 | string = countryMap[string] || string; 101 | for (const country of countryCodes) { 102 | if (country['alpha-3'] === string || country['alpha-2'] === string || country.name === string || country.name.replace(/\s*\(.*?\)/, '') === string || country.name.replace(/, Province of .*$/, '') === string || country.name.replace(/, Republic of$/, '') === string) { 103 | return country['alpha-3']; 104 | } 105 | } 106 | console.warn('⚠️ Could not find country code for', string); 107 | return string; 108 | } 109 | 110 | /* 111 | Calculates active cases from location data 112 | */ 113 | function getActiveFromLocation(location) { 114 | const cases = location.cases !== undefined ? location.cases : 0; 115 | const deaths = location.deaths !== undefined ? location.deaths : 0; 116 | const recovered = location.recovered !== undefined ? location.recovered : 0; 117 | return cases - deaths - recovered; 118 | } 119 | 120 | /* 121 | Turn a timeseries into a date-based bit 122 | */ 123 | function transposeTimeseries(timeseriesByLocation) { 124 | function getProps(location) { 125 | const newLocation = { ...location }; 126 | delete newLocation.dates; 127 | return newLocation; 128 | } 129 | 130 | // Find all dates and locations 131 | const locations = []; 132 | let allDates = []; 133 | for (const [locationName, location] of Object.entries(timeseriesByLocation)) { 134 | for (const [date] of Object.entries(location.dates)) { 135 | if (allDates.indexOf(date) === -1) { 136 | allDates.push(date); 137 | } 138 | } 139 | const newLocation = getProps(location); 140 | newLocation.name = locationName; 141 | locations.push(newLocation); 142 | } 143 | 144 | // Sort dates 145 | allDates = allDates.sort((a, b) => { 146 | if (a === b) { 147 | return 0; 148 | } 149 | 150 | if (datetime.dateIsBefore(a, b)) { 151 | return -1; 152 | } 153 | return 1; 154 | }); 155 | 156 | const timeseriesByDate = {}; 157 | // Iterate over all dates, add data 158 | for (const date of allDates) { 159 | timeseriesByDate[date] = {}; 160 | 161 | let index = 0; 162 | for (const location of locations) { 163 | const locationData = timeseriesByLocation[location.name]; 164 | if (locationData.dates[date]) { 165 | timeseriesByDate[date][index] = locationData.dates[date]; 166 | } 167 | index++; 168 | } 169 | } 170 | 171 | return { timeseriesByDate, locations }; 172 | } 173 | 174 | const caseFields = ['cases', 'recovered', 'active', 'deaths', 'tested']; 175 | 176 | /* 177 | Sum the passed array of data into a single object with the properties of the optional, second argument 178 | */ 179 | function sumData(dataArray, object) { 180 | const summedData = { ...object }; 181 | for (const data of dataArray) { 182 | for (const field of caseFields) { 183 | if (data[field]) { 184 | summedData[field] = summedData[field] || 0; 185 | summedData[field] += data[field]; 186 | } 187 | } 188 | } 189 | return summedData; 190 | } 191 | 192 | /* 193 | Get the priority of a location 194 | */ 195 | function getPriority(location) { 196 | return location.priority !== undefined ? location.priority : 0; 197 | } 198 | 199 | /* 200 | Get the growth factor for two numbers, null if infinite 201 | */ 202 | function getGrowthfactor(casesToday, casesYesterday) { 203 | const growthFactor = casesToday / casesYesterday; 204 | if (growthFactor === Infinity) { 205 | return null; 206 | } 207 | return growthFactor; 208 | } 209 | 210 | export { objectToArray, addCounty, getName, hash, toUSStateAbbreviation, getActiveFromLocation, transposeTimeseries, toISO3166Alpha3, sumData, usStates, getPriority, getGrowthfactor }; 211 | -------------------------------------------------------------------------------- /site/style.css: -------------------------------------------------------------------------------- 1 | @import url("https://use.typekit.net/xcj1vcp.css"); 2 | 3 | :root { 4 | --cds-editor-header-background-color: var(--spectrum-global-color-gray-500); 5 | } 6 | 7 | .spectrum--light { 8 | /* Reassign spectrum color variables with our own generated colors */ 9 | --spectrum-global-color-gray-50: var(--Gray33); 10 | --spectrum-global-color-gray-75: var(--Gray67); 11 | --spectrum-global-color-gray-100: var(--Gray100); 12 | --spectrum-global-color-gray-200: var(--Gray200); 13 | --spectrum-global-color-gray-300: var(--Gray300); 14 | --spectrum-global-color-gray-400: var(--Gray400); 15 | --spectrum-global-color-gray-500: var(--Gray500); 16 | --spectrum-global-color-gray-500: var(--Gray500); 17 | --spectrum-global-color-gray-700: var(--Gray700); 18 | --spectrum-global-color-gray-800: var(--Gray800); 19 | --spectrum-global-color-gray-900: var(--Gray900); 20 | 21 | --spectrum-global-color-blue-400: var(--Blue100); 22 | --spectrum-global-color-blue-500: var(--Blue200); 23 | --spectrum-global-color-blue-600: var(--Blue300); 24 | --spectrum-global-color-blue-700: var(--Blue400); 25 | 26 | --spectrum-semantic-cta-color-background-default: var(--spectrum-global-color-blue-600); 27 | --spectrum-semantic-cta-color-background-hover: var(--spectrum-global-color-blue-700); 28 | --spectrum-semantic-cta-color-background-down: var(--spectrum-global-color-blue-700); 29 | --spectrum-semantic-cta-color-background-key-focus: var(--spectrum-global-color-blue-600); 30 | } 31 | 32 | * { 33 | box-sizing: border-box; 34 | } 35 | 36 | html { 37 | font-size: 16px; 38 | height: 100%; 39 | } 40 | 41 | body { 42 | height: 100%; 43 | margin: 0; 44 | } 45 | 46 | [hidden] { 47 | display: none !important; 48 | } 49 | 50 | .cds-Editor { 51 | height: 100%; 52 | overflow: hidden; 53 | display: flex; 54 | flex-direction: column; 55 | } 56 | .cds-Editor-header { 57 | position: relative; 58 | display: flex; 59 | flex-direction: row; 60 | 61 | background-color: var(--cds-editor-header-background-color); 62 | } 63 | .cds-Editor-content { 64 | flex: 1; 65 | overflow-x: auto; 66 | } 67 | .cds-Editor-header { 68 | background: var(--spectrum-global-color-gray-75); 69 | } 70 | .cds-Editor-header:after { 71 | content: ''; 72 | position: absolute; 73 | bottom: 0; 74 | display: block; 75 | width: 100%; 76 | height: 1px; 77 | background: var(--spectrum-global-color-gray-200); 78 | } 79 | 80 | .cds-Editor-JSON { 81 | padding: 8px 24px; 82 | } 83 | 84 | .cds-Editor-title, 85 | .cds-Editor-actions { 86 | padding: 8px 12px; 87 | } 88 | 89 | .cds-Editor-title { 90 | flex: 1; 91 | padding-left: 16px; 92 | } 93 | 94 | .cds-Logo { 95 | height: 56px; 96 | } 97 | 98 | .cds-Logo-Type { 99 | fill: #21385E; 100 | } 101 | 102 | .spectrum-Site-title { 103 | font-size: 48px; 104 | margin-left: 12px; 105 | } 106 | 107 | .cds-Ribbon { 108 | position: fixed; 109 | top: 0; 110 | right: 0; 111 | } 112 | 113 | .cds-Footer { 114 | margin-top: 120px; 115 | } 116 | 117 | .cds-Sidebar { 118 | z-index: 1000; 119 | min-width: 235px; 120 | border-right: 1px solid var(--spectrum-global-color-gray-200); 121 | } 122 | 123 | .spectrum-ActionButton .icon { 124 | font-size: 24px; 125 | } 126 | 127 | .spectrum-Button--cta { 128 | 129 | } 130 | 131 | .spectrum-Button .icon { 132 | font-size: 18px; 133 | margin-right: 8px; 134 | } 135 | 136 | .cds-Editor-menuToggle { 137 | display: none; 138 | } 139 | 140 | .spectrum-Site-overlay { 141 | z-index: 999; 142 | } 143 | 144 | .cds-Error { 145 | display: inline-block; 146 | margin: 24px; 147 | padding: 8px 16px; 148 | background: var(--spectrum-semantic-notice-color-background); 149 | border-radius: var(--spectrum-alias-border-radius-medium); 150 | color: white; 151 | } 152 | 153 | .spectrum-Heading { 154 | font-family: aglet-slab, sans-serif; 155 | font-weight: 500; 156 | color: var(--spectrum-global-color-blue-500); 157 | } 158 | .spectrum-SideNav-itemLink { 159 | font-family: aglet-slab, sans-serif; 160 | font-weight: 500; 161 | } 162 | .spectrum-Body { 163 | font-family: aglet-sans, sans-serif; 164 | } 165 | 166 | .spectrum-Site-hero .spectrum-Heading--L { 167 | margin-top: calc( 2.5 * var(--spectrum-heading-l-margin-top, var(--spectrum-alias-heading-l-margin-top))); 168 | } 169 | 170 | .spectrum-Link { 171 | text-decoration: underline; 172 | } 173 | 174 | .spectrum-Link--silent { 175 | text-decoration: none; 176 | } 177 | 178 | div.spectrum-SideNav-itemLink { 179 | cursor: default; 180 | } 181 | 182 | div.spectrum-SideNav-itemLink:hover { 183 | background: transparent; 184 | } 185 | 186 | .cds-Sources-list { 187 | padding: 0; 188 | margin: 48px 24px 0 24px; 189 | } 190 | 191 | /* Ratings */ 192 | .cds-ReportCard { 193 | display: flex; 194 | flex-direction: row; 195 | margin: 16px 0 32px 0; 196 | } 197 | 198 | .cds-ReportCard h2.spectrum-Heading--L { 199 | margin-top: 0; 200 | } 201 | 202 | .cds-ReportCard-grade { 203 | flex-shrink: 0; 204 | display: flex; 205 | align-items: center; 206 | justify-content: center; 207 | 208 | position: relative; 209 | 210 | width: 78px; 211 | height: 78px; 212 | 213 | margin-right: 16px; 214 | 215 | border: 4px solid transparent; 216 | border-radius: 100%; 217 | 218 | font-family: aglet-slab, sans-serif; 219 | font-size: 48px; 220 | 221 | --b-color: rgb(131, 154, 20); 222 | } 223 | .cds-ReportCard-grade--A { 224 | color: green; 225 | border-color: green; 226 | } 227 | .cds-ReportCard-grade--B { 228 | color: var(--b-color); 229 | border-color: var(--b-color); 230 | } 231 | .cds-ReportCard-grade--C { 232 | color: orange; 233 | border-color: orange; 234 | } 235 | .cds-ReportCard-grade--D, 236 | .cds-ReportCard-grade--F { 237 | color: red; 238 | border-color: red; 239 | } 240 | .cds-ReportCard-criteria { 241 | margin-top: 12px; 242 | } 243 | 244 | .cds-ReportCard-plusMinus { 245 | position: absolute; 246 | right: 4px; 247 | top: 6px; 248 | font-size: 36px; 249 | } 250 | 251 | /* Map */ 252 | #map { 253 | height: 100%; 254 | width: 100%; 255 | } 256 | 257 | .cds-Popup { 258 | margin: -10px 0; 259 | } 260 | 261 | .cds-Popup-table { 262 | margin: auto; 263 | border-collapse: collapse; 264 | } 265 | 266 | .cds-Popup-table th { 267 | text-align: right; 268 | padding-right: 8px; 269 | } 270 | 271 | @media screen and (max-width: 960px) { 272 | .is-editing .cds-SiteHeader { 273 | display: none; 274 | } 275 | .cds-Editor-menuToggle { 276 | display: block; 277 | } 278 | .cds-Sources-list { 279 | margin: 48px 12px 0 12px; 280 | } 281 | 282 | .spectrum-Site-content { 283 | /* fix: header scrolls off */ 284 | max-height: calc(100% - 48px); 285 | } 286 | } 287 | -------------------------------------------------------------------------------- /lib/fetch.js: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import needle from 'needle'; 3 | import csvParse from 'csv-parse'; 4 | import puppeteer from 'puppeteer'; 5 | 6 | import * as datetime from './datetime.js'; 7 | import * as caching from './caching.js'; 8 | 9 | const CHROME_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'; 10 | const DEFAULT_VIEWPORT = { width: 1280, height: 800, isMobile: false }; 11 | 12 | const OPEN_TIMEOUT = 5000; 13 | const RESPONSE_TIMEOUT = 5000; 14 | const READ_TIMEOUT = 30000; 15 | 16 | // Spoof Chrome, just in case 17 | needle.defaults({ 18 | parse_response: false, 19 | user_agent: CHROME_AGENT, 20 | open_timeout: OPEN_TIMEOUT, // Maximum time to wait to establish a connection 21 | response_timeout: RESPONSE_TIMEOUT, // Maximum time to wait for a response 22 | read_timeout: READ_TIMEOUT // Maximum time to wait for data to transfer 23 | }); 24 | 25 | /** 26 | * Fetch whatever is at the provided URL. Use cached version if available. 27 | * @param {*} url URL of the resource 28 | * @param {*} type type of the resource 29 | * @param {*} date the date associated with this resource, or false if a timeseries data 30 | * @param {*} options customizable options: 31 | * - alwaysRun: fetches from URL even if resource is in cache, defaults to false 32 | * - disableSSL: disables SSL verification for this resource, should be avoided 33 | */ 34 | export const fetch = async (url, type, date = process.env.SCRAPE_DATE || datetime.getYYYYMD(), options = {}) => { 35 | const { alwaysRun, disableSSL } = { alwaysRun: false, disableSSL: false, ...options }; 36 | 37 | const cachedBody = await caching.getCachedFile(url, type, date); 38 | 39 | if (cachedBody === caching.CACHE_MISS || alwaysRun) { 40 | console.log(' 🚦 Loading data for %s from server', url); 41 | 42 | if (disableSSL) { 43 | console.log(' ⚠️ SSL disabled for this resource'); 44 | process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; 45 | } 46 | 47 | const response = await needle('get', url); 48 | const fetchedBody = response.body.toString(); 49 | 50 | await caching.saveFileToCache(url, type, date, fetchedBody); 51 | 52 | if (disableSSL) { 53 | delete process.env.NODE_TLS_REJECT_UNAUTHORIZED; 54 | } 55 | 56 | return fetchedBody; 57 | } 58 | return cachedBody; 59 | }; 60 | 61 | /** 62 | * Load the webpage at the given URL and return a Cheerio object 63 | * @param {*} url URL of the resource 64 | * @param {*} date the date associated with this resource, or false if a timeseries data 65 | * @param {*} options customizable options: 66 | * - alwaysRun: fetches from URL even if resource is in cache, defaults to false 67 | * - disableSSL: disables SSL verification for this resource, should be avoided 68 | */ 69 | export const page = async (url, date, options = {}) => { 70 | const body = await fetch(url, 'html', date, options); 71 | 72 | if (!body) { 73 | return null; 74 | } 75 | return cheerio.load(body); 76 | }; 77 | 78 | /** 79 | * Load and parse JSON from the given URL 80 | * @param {*} url URL of the resource 81 | * @param {*} date the date associated with this resource, or false if a timeseries data 82 | * @param {*} options customizable options: 83 | * - alwaysRun: fetches from URL even if resource is in cache, defaults to false 84 | * - disableSSL: disables SSL verification for this resource, should be avoided 85 | */ 86 | export const json = async (url, date, options = {}) => { 87 | const body = await fetch(url, 'json', date, options); 88 | 89 | if (!body) { 90 | return null; 91 | } 92 | return JSON.parse(body); 93 | }; 94 | 95 | /** 96 | * Load and parse CSV from the given URL 97 | * @param {*} url URL of the resource 98 | * @param {*} date the date associated with this resource, or false if a timeseries data 99 | * @param {*} options customizable options: 100 | * - alwaysRun: fetches from URL even if resource is in cache, defaults to false 101 | * - disableSSL: disables SSL verification for this resource, should be avoided 102 | */ 103 | export const csv = async (url, date, options = {}) => { 104 | return new Promise(async (resolve, reject) => { 105 | const body = await fetch(url, 'csv', date, options); 106 | 107 | if (!body) { 108 | resolve(null); 109 | } else { 110 | csvParse( 111 | body, 112 | { 113 | columns: true 114 | }, 115 | function(err, output) { 116 | if (err) { 117 | reject(err); 118 | } else { 119 | resolve(output); 120 | } 121 | } 122 | ); 123 | } 124 | }); 125 | }; 126 | 127 | const fetchHeadless = async url => { 128 | console.log(' 🤹‍♂️ Loading data for %s from server with a headless browser', url); 129 | 130 | const browser = await puppeteer.launch(); 131 | const page = await browser.newPage(); 132 | 133 | await page.setUserAgent(CHROME_AGENT); 134 | await page.setViewport(DEFAULT_VIEWPORT); 135 | 136 | try { 137 | const response = await page.goto(url, { 138 | timeout: READ_TIMEOUT, 139 | waitUntil: 'networkidle2' 140 | }); 141 | 142 | if (response._status < 400) { 143 | await page.waitFor(RESPONSE_TIMEOUT); 144 | const html = await page.content(); 145 | browser.close(); 146 | return html; 147 | } 148 | console.log(' ❌ Got error %d trying to fetch %s headless', response._status, url); 149 | browser.close(); 150 | return null; 151 | } catch (err) { 152 | browser.close(); 153 | 154 | if (err.name === 'TimeoutError') { 155 | console.log(' ❌ Timed out trying to fetch %s headless', url); 156 | return null; 157 | } 158 | throw err; 159 | } 160 | }; 161 | 162 | /** 163 | * Fetch whatever is at the provided URL in headless mode with Pupeteer. Use cached version if available. 164 | * @param {*} url URL of the resource 165 | * @param {*} date the date associated with this resource, or false if a timeseries data 166 | * @param {*} alwaysRun fetches from URL even if resource is in cache, defaults to false 167 | */ 168 | export const headless = async (url, date = process.env.SCRAPE_DATE || datetime.getYYYYMD(), options = {}) => { 169 | const { alwaysRun } = { alwaysRun: false, disableSSL: false, ...options }; 170 | 171 | const cachedBody = await caching.getCachedFile(url, 'html', date); 172 | 173 | if (cachedBody === caching.CACHE_MISS || alwaysRun) { 174 | const fetchedBody = await fetchHeadless(url); 175 | await caching.saveFileToCache(url, 'html', date, fetchedBody); 176 | 177 | const $ = await cheerio.load(fetchedBody); 178 | return $; 179 | } 180 | const $ = await cheerio.load(cachedBody); 181 | return $; 182 | }; 183 | -------------------------------------------------------------------------------- /timeseries.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import generate from './index.js'; 3 | import argv from './lib/cliArgs.js'; 4 | import * as transform from './lib/transform.js'; 5 | import * as datetime from './lib/datetime.js'; 6 | import * as fs from './lib/fs.js'; 7 | 8 | // The props to keep on a date object 9 | const caseDataProps = ['cases', 'deaths', 'recovered', 'active', 'tested', 'growthFactor']; 10 | 11 | let dates; 12 | 13 | /* 14 | Drop everything but case data from a location 15 | */ 16 | function stripInfo(location) { 17 | const newLocation = {}; 18 | for (const prop of caseDataProps) { 19 | if (location[prop] !== undefined) { 20 | newLocation[prop] = location[prop]; 21 | } 22 | } 23 | return newLocation; 24 | } 25 | 26 | /* 27 | Drop case data from a location 28 | */ 29 | function stripCases(location) { 30 | const newLocation = {}; 31 | for (const prop in location) { 32 | if (caseDataProps.indexOf(prop) === -1) { 33 | newLocation[prop] = location[prop]; 34 | } 35 | } 36 | return newLocation; 37 | } 38 | 39 | async function generateTidyCSV(timeseriesByLocation) { 40 | let columns = ['city', 'county', 'state', 'country', 'population', 'lat', 'long']; 41 | 42 | const csvData = []; 43 | for (const [, location] of Object.entries(timeseriesByLocation)) { 44 | // Build base row 45 | const row = []; 46 | for (const column of columns) { 47 | if (column === 'lat') { 48 | row.push(location.coordinates ? location.coordinates[1] : ''); 49 | } else if (column === 'long') { 50 | row.push(location.coordinates ? location.coordinates[0] : ''); 51 | } else { 52 | row.push(location[column]); 53 | } 54 | } 55 | 56 | // For each date, add rows for each type 57 | for (const date of dates) { 58 | for (const type of caseDataProps) { 59 | if (location.dates[date] && location.dates[date][type] !== undefined) { 60 | const dateTypeRow = row.slice(); 61 | dateTypeRow.push(datetime.getYYYYMMDD(new Date(date))); 62 | dateTypeRow.push(type); 63 | dateTypeRow.push(location.dates[date][type]); 64 | csvData.push(dateTypeRow); 65 | } 66 | } 67 | } 68 | } 69 | 70 | columns = columns.concat(['date', 'type', 'value']); 71 | 72 | csvData.splice(0, 0, columns); 73 | 74 | return fs.writeCSV(path.join('dist', 'timeseries-tidy.csv'), csvData); 75 | } 76 | 77 | async function generateLessTidyCSV(timeseriesByLocation) { 78 | let columns = ['city', 'county', 'state', 'country', 'population', 'lat', 'long', 'url']; 79 | 80 | const csvData = []; 81 | for (const [, location] of Object.entries(timeseriesByLocation)) { 82 | // Build base row 83 | const row = []; 84 | for (const column of columns) { 85 | if (column === 'lat') { 86 | row.push(location.coordinates ? location.coordinates[1] : ''); 87 | } else if (column === 'long') { 88 | row.push(location.coordinates ? location.coordinates[0] : ''); 89 | } else { 90 | row.push(location[column]); 91 | } 92 | } 93 | 94 | // For each date, add a row 95 | for (const date of dates) { 96 | const dateRow = row.slice(); 97 | let hasData = false; 98 | for (const type of caseDataProps) { 99 | if (location.dates[date] && location.dates[date][type]) { 100 | hasData = true; 101 | } 102 | dateRow.push((location.dates[date] && location.dates[date][type]) || ''); 103 | } 104 | if (hasData) { 105 | dateRow.push(datetime.getYYYYMMDD(new Date(date))); 106 | csvData.push(dateRow); 107 | } 108 | } 109 | } 110 | 111 | columns = columns.concat(caseDataProps).concat(['date']); 112 | 113 | csvData.splice(0, 0, columns); 114 | 115 | return fs.writeCSV(path.join('dist', 'timeseries.csv'), csvData); 116 | } 117 | 118 | async function generateCSV(timeseriesByLocation) { 119 | let columns = ['city', 'county', 'state', 'country', 'lat', 'long', 'population', 'type', 'value', 'url']; 120 | 121 | const csvData = []; 122 | for (const [, location] of Object.entries(timeseriesByLocation)) { 123 | const row = []; 124 | for (const column of columns) { 125 | if (column === 'lat') { 126 | row.push(location.coordinates ? location.coordinates[1] : ''); 127 | } else if (column === 'long') { 128 | row.push(location.coordinates ? location.coordinates[0] : ''); 129 | } else { 130 | row.push(location[column]); 131 | } 132 | } 133 | 134 | for (const date of dates) { 135 | row.push(location.dates[date] ? location.dates[date].cases : ''); 136 | } 137 | 138 | csvData.push(row); 139 | } 140 | 141 | columns = columns.concat(dates); 142 | csvData.splice(0, 0, columns); 143 | 144 | return fs.writeCSV(path.join('dist', 'timeseries-jhu.csv'), csvData); 145 | } 146 | 147 | function getGrowthfactor(casesToday, casesYesterday) { 148 | const growthFactor = casesToday / casesYesterday; 149 | if (growthFactor === Infinity) { 150 | return null; 151 | } 152 | return growthFactor; 153 | } 154 | 155 | /* 156 | Generate timeseries data 157 | */ 158 | async function generateTimeseries(options = {}) { 159 | // Generate a list of dates starting at the first date, OR the provided start date 160 | // ending at today or the provided end date 161 | dates = []; 162 | const today = new Date(); 163 | const endDate = options.endDate ? new Date(options.endDate) : today; 164 | let curDate = new Date('2020-1-22'); 165 | if (options.date) { 166 | curDate = new Date(options.date); 167 | } 168 | while (curDate <= endDate) { 169 | dates.push(datetime.getYYYYMD(curDate)); 170 | curDate.setDate(curDate.getDate() + 1); 171 | } 172 | 173 | const timeseriesByLocation = {}; 174 | let previousDate = null; 175 | const lastDate = dates[dates.length - 1]; 176 | let featureCollection; 177 | for (const date of dates) { 178 | const data = await generate(date === today ? undefined : date, { 179 | findFeatures: date === lastDate, 180 | findPopulations: date === lastDate, 181 | writeData: false, 182 | ...options 183 | }); 184 | 185 | if (date === lastDate) { 186 | featureCollection = data.featureCollection; 187 | } 188 | 189 | for (const location of data.locations) { 190 | const name = transform.getName(location); 191 | 192 | timeseriesByLocation[name] = { dates: {}, ...timeseriesByLocation[name], ...stripCases(location) }; 193 | 194 | const strippedLocation = stripInfo(location); 195 | 196 | // Add growth factor 197 | if (previousDate && timeseriesByLocation[name].dates[previousDate]) { 198 | strippedLocation.growthFactor = getGrowthfactor(strippedLocation.cases, timeseriesByLocation[name].dates[previousDate].cases); 199 | } 200 | 201 | timeseriesByLocation[name].dates[date] = strippedLocation; 202 | } 203 | 204 | previousDate = date; 205 | } 206 | 207 | await fs.writeJSON(path.join('dist', 'timeseries-byLocation.json'), timeseriesByLocation); 208 | await fs.writeJSON(path.join('dist', 'features.json'), featureCollection); 209 | 210 | const { locations, timeseriesByDate } = transform.transposeTimeseries(timeseriesByLocation); 211 | await fs.writeFile(path.join('dist', `timeseries.json`), JSON.stringify(timeseriesByDate, null, 2)); 212 | await fs.writeFile(path.join('dist', `locations.json`), JSON.stringify(locations, null, 2)); 213 | 214 | await generateCSV(timeseriesByLocation); 215 | 216 | await generateTidyCSV(timeseriesByLocation); 217 | 218 | await generateLessTidyCSV(timeseriesByLocation); 219 | } 220 | 221 | generateTimeseries(argv); 222 | -------------------------------------------------------------------------------- /tasks/findFeatures.js: -------------------------------------------------------------------------------- 1 | import turf from '@turf/turf'; 2 | 3 | import * as fs from '../lib/fs.js'; 4 | import * as transform from '../lib/transform.js'; 5 | 6 | const DEBUG = false; 7 | 8 | function cleanProps(obj) { 9 | if (obj.wikipedia === -99) { 10 | delete obj.wikipedia; 11 | } 12 | 13 | for (const prop in obj) { 14 | if (typeof obj[prop] === 'string' && obj[prop].trim() === '') { 15 | delete obj[prop]; 16 | } 17 | } 18 | 19 | return obj; 20 | } 21 | 22 | function takeOnlyProps(obj, props) { 23 | const newObj = {}; 24 | for (const prop of props) { 25 | if (typeof obj[prop] !== 'undefined') { 26 | newObj[prop] = obj[prop]; 27 | } 28 | } 29 | return newObj; 30 | } 31 | 32 | function normalizeProps(obj) { 33 | const newObj = {}; 34 | // eslint-disable-next-line guard-for-in 35 | for (const prop in obj) { 36 | newObj[prop.toLowerCase()] = obj[prop]; 37 | } 38 | return newObj; 39 | } 40 | 41 | const props = ['name', 'name_en', 'abbrev', 'region', 'admin', 'postal', 'gu_a3', 'adm0_a3', 'geonunit', 'pop_est', 'pop_year', 'gdp_md_est', 'gdp_year', 'iso_a2', 'iso_3166_2', 'type_en', 'wikipedia']; 42 | 43 | const locationTransforms = { 44 | // 🇭🇰 45 | 'Hong Kong': location => { 46 | location.country = 'HKG'; 47 | delete location.state; 48 | }, 49 | 50 | Macau: location => { 51 | location.country = 'MAC'; 52 | delete location.state; 53 | }, 54 | 55 | // Why is this in Denmark? 56 | 'Faroe Islands': location => { 57 | location.country = 'Faroe Islands'; 58 | delete location.state; 59 | } 60 | }; 61 | 62 | function cleanFeatures(set) { 63 | for (const feature of set.features) { 64 | feature.properties = cleanProps(takeOnlyProps(normalizeProps(feature.properties), props)); 65 | } 66 | } 67 | 68 | const generateFeatures = ({ locations, report, options, sourceRatings }) => { 69 | const featureCollection = { 70 | type: 'FeatureCollection', 71 | features: [] 72 | }; 73 | 74 | let foundCount = 0; 75 | 76 | function storeFeature(feature, location) { 77 | let index = featureCollection.features.indexOf(feature); 78 | if (index === -1) { 79 | index = featureCollection.features.push(feature) - 1; 80 | if (feature.properties.geonunit) { 81 | feature.properties.shortName = feature.properties.name; 82 | feature.properties.name = `${feature.properties.name}, ${feature.properties.geonunit}`; 83 | } 84 | } 85 | 86 | // Store coordinates on location 87 | if (feature.geometry) { 88 | location.coordinates = turf.center(feature).geometry.coordinates; 89 | } 90 | 91 | if (DEBUG) { 92 | console.log('Storing %s in %s', location.name, feature.properties.name); 93 | } 94 | 95 | feature.properties.id = index; 96 | location.featureId = index; 97 | foundCount++; 98 | } 99 | 100 | return new Promise(async resolve => { 101 | console.log('⏳ Generating features...'); 102 | 103 | const countryData = await fs.readJSON('./coronavirus-data-sources/geojson/world-countries.json'); 104 | const usCountyData = await fs.readJSON('./coronavirus-data-sources/geojson/usa-counties.json'); 105 | const itaRegionsData = await fs.readJSON('./coronavirus-data-sources/geojson/ita-regions.json'); 106 | const provinceData = await fs.readJSON('./coronavirus-data-sources/geojson/world-states-provinces.json'); 107 | 108 | provinceData.features = itaRegionsData.features.concat(provinceData.features); 109 | 110 | // Clean and normalize data first 111 | cleanFeatures(countryData); 112 | cleanFeatures(provinceData); 113 | 114 | const errors = []; 115 | 116 | locationLoop: for (const location of locations) { 117 | let found = false; 118 | let point; 119 | if (location.coordinates) { 120 | point = turf.point(location.coordinates); 121 | } 122 | 123 | // Breaks France 124 | if (location.country === 'REU' || location.country === 'MTQ' || location.country === 'GUF') { 125 | console.warn(' ⚠️ Skipping %s because it breaks France', transform.getName(location)); 126 | continue; 127 | } 128 | 129 | if (location.county === '(unassigned)') { 130 | console.warn(" ⚠️ Skipping %s because it's unassigned", transform.getName(location)); 131 | continue; 132 | } 133 | 134 | // Apply transforms 135 | if (locationTransforms[location.state]) { 136 | locationTransforms[location.state](location); 137 | } 138 | 139 | if (location.state || location.county) { 140 | if (location.country === 'USA') { 141 | if (location.county) { 142 | // Find county 143 | for (const feature of usCountyData.features) { 144 | if (!location.county) { 145 | continue; 146 | } 147 | if (feature.properties.name === `${location.county.replace('Parish', 'County')}, ${location.state}`) { 148 | found = true; 149 | storeFeature(feature, location); 150 | continue locationLoop; 151 | } 152 | if (point && feature.geometry) { 153 | const poly = turf.feature(feature.geometry); 154 | if (turf.booleanPointInPolygon(point, poly)) { 155 | found = true; 156 | storeFeature(feature, location); 157 | continue locationLoop; 158 | } 159 | } 160 | } 161 | } else if (location.state) { 162 | for (const feature of provinceData.features) { 163 | if (location.state === feature.properties.postal && feature.properties.adm0_a3 === 'USA') { 164 | found = true; 165 | storeFeature(feature, location); 166 | continue locationLoop; 167 | } 168 | } 169 | } 170 | } else { 171 | // Check if the location exists within our provinces 172 | for (const feature of provinceData.features) { 173 | const countryMatches = location.country === feature.properties.gu_a3 || location.country === feature.properties.adm0_a3; 174 | const stateMatches = location.state && (location.state === feature.properties.name || location.state === feature.properties.name_en || location.state === feature.properties.region); 175 | const countyMatches = location.county && (location.county === feature.properties.name || location.county === feature.properties.name_en || location.county === feature.properties.region); 176 | if (countryMatches && (stateMatches || countyMatches)) { 177 | found = true; 178 | storeFeature(feature, location); 179 | break; 180 | } 181 | 182 | if (point && feature.geometry) { 183 | const poly = turf.feature(feature.geometry); 184 | if (turf.booleanPointInPolygon(point, poly)) { 185 | found = true; 186 | storeFeature(feature, location); 187 | break; 188 | } 189 | } 190 | 191 | // Match alternate names 192 | // No known location, but might be useful in the future 193 | if (feature.properties.alt && feature.properties.alt.split('|').indexOf(location.state) !== -1) { 194 | found = true; 195 | storeFeature(feature, location); 196 | break; 197 | } 198 | if (feature.properties.region === location.state && feature.properties.admin === location.country) { 199 | found = true; 200 | storeFeature(feature, location); 201 | break; 202 | } 203 | } 204 | } 205 | } else { 206 | // Check if the location exists within our countries 207 | for (const feature of countryData.features) { 208 | // Find by full name 209 | if (location.country === feature.properties.adm0_a3 || location.country === feature.properties.gu_a3) { 210 | found = true; 211 | storeFeature(feature, location); 212 | break; 213 | } 214 | 215 | // Find by abbreviation 216 | if (feature.properties.abbrev && feature.properties.abbrev.replace(/\./g, '') === location.country) { 217 | found = true; 218 | storeFeature(feature, location); 219 | break; 220 | } 221 | 222 | if (point && feature.geometry) { 223 | const poly = turf.feature(feature.geometry); 224 | 225 | if (turf.booleanPointInPolygon(point, poly)) { 226 | found = true; 227 | storeFeature(feature, location); 228 | break; 229 | } 230 | } 231 | } 232 | 233 | // Check by province as a last resort 234 | if (!found) { 235 | // Check within provinces 236 | for (const feature of provinceData.features) { 237 | if (location.country === feature.properties.name) { 238 | found = true; 239 | storeFeature(feature, location); 240 | break; 241 | } 242 | 243 | // Find by geonunit 244 | if (feature.properties.geonunit === location.country) { 245 | found = true; 246 | storeFeature(feature, location); 247 | break; 248 | } 249 | 250 | if (point && feature.geometry) { 251 | const poly = turf.feature(feature.geometry); 252 | 253 | if (turf.booleanPointInPolygon(point, poly)) { 254 | found = true; 255 | storeFeature(feature, location); 256 | break; 257 | } 258 | } 259 | } 260 | } 261 | } 262 | 263 | if (!found) { 264 | console.error(' ❌ Could not find location %s', transform.getName(location)); 265 | errors.push(transform.getName(location)); 266 | } 267 | } 268 | 269 | console.log('✅ Found features for %d out of %d regions for a total of %d features', foundCount, Object.keys(locations).length, featureCollection.features.length); 270 | 271 | report.findFeatures = { 272 | numFeaturesFound: foundCount, 273 | missingFeatures: errors 274 | }; 275 | 276 | resolve({ locations, featureCollection, report, options, sourceRatings }); 277 | }); 278 | }; 279 | 280 | export default generateFeatures; 281 | -------------------------------------------------------------------------------- /tasks/scrapeData.js: -------------------------------------------------------------------------------- 1 | import scrapers from '../scrapers.js'; 2 | import * as transform from '../lib/transform.js'; 3 | import * as datetime from '../lib/datetime.js'; 4 | import calculateRating from '../lib/rating.js'; 5 | 6 | const numericalValues = ['cases', 'tested', 'recovered', 'deaths', 'active']; 7 | 8 | const scraperVars = ['type', 'timeseries', 'headless', 'ssl', 'priority']; 9 | 10 | /* 11 | Returns a report if the crosscheck fails, or false if the two sets have identical data 12 | */ 13 | function crosscheck(a, b) { 14 | const crosscheckReport = {}; 15 | let failed = false; 16 | for (const prop of numericalValues) { 17 | if (a[prop] !== b[prop]) { 18 | crosscheckReport[prop] = [a[prop], b[prop]]; 19 | failed = true; 20 | } 21 | } 22 | return failed ? crosscheckReport : false; 23 | } 24 | 25 | /* 26 | Combine location information with the passed data object 27 | */ 28 | function addLocationToData(data, location) { 29 | Object.assign(data, location); 30 | 31 | delete data.scraper; 32 | 33 | // Add rating 34 | data.rating = calculateRating(data); 35 | 36 | // Store for usage in ratings 37 | data._scraperDefinition = location; 38 | 39 | return data; 40 | } 41 | 42 | /* 43 | Check if the provided data contains any invalid fields 44 | */ 45 | function isValid(data) { 46 | if (data.cases === undefined) { 47 | throw new Error(`Invalid data: contains no case data`); 48 | } 49 | 50 | for (const [prop, value] of Object.entries(data)) { 51 | if (value === null) { 52 | throw new Error(`Invalid data: ${prop} is null`); 53 | } 54 | if (Number.isNaN(value)) { 55 | throw new Error(`Invalid data: ${prop} is not a number`); 56 | } 57 | } 58 | 59 | for (const prop of numericalValues) { 60 | if (data[prop] !== undefined && typeof data[prop] !== 'number') { 61 | throw new Error(`Invalid data: ${prop} is not a number`); 62 | } 63 | } 64 | 65 | return true; 66 | } 67 | 68 | /* 69 | Remove "private" object properties 70 | */ 71 | function removePrivate(data) { 72 | for (const [prop, value] of Object.entries(data)) { 73 | if (value === '') { 74 | delete data[prop]; 75 | } 76 | // Remove "private" fields 77 | if (prop[0] === '_') { 78 | delete data[prop]; 79 | } 80 | } 81 | 82 | return data; 83 | } 84 | 85 | /* 86 | Clean the passed data 87 | */ 88 | function clean(data) { 89 | removePrivate(data); 90 | 91 | // Remove non-data vars 92 | for (const prop of scraperVars) { 93 | delete data[prop]; 94 | } 95 | 96 | return data; 97 | } 98 | 99 | /* 100 | Clean the passed data 101 | */ 102 | function normalize(data) { 103 | // Normalize states 104 | if (data.country === 'USA') { 105 | data.state = transform.toUSStateAbbreviation(data.state); 106 | } 107 | 108 | // Normalize countries 109 | data.country = transform.toISO3166Alpha3(data.country); 110 | 111 | return data; 112 | } 113 | 114 | /* 115 | Add output data to the cases array. Input can be either an object or an array 116 | */ 117 | function addData(cases, location, result) { 118 | if (Array.isArray(result)) { 119 | if (result.length === 0) { 120 | throw new Error(`Invalid data: scraper for ${transform.getName(location)} returned 0 rows`); 121 | } 122 | for (const data of result) { 123 | if (isValid(data, location)) { 124 | cases.push(addLocationToData(data, location)); 125 | } 126 | } 127 | } else if (isValid(result, location)) { 128 | cases.push(addLocationToData(result, location)); 129 | } 130 | } 131 | 132 | /* 133 | Run the correct scraper for this location 134 | */ 135 | function runScraper(location) { 136 | if (typeof location.scraper === 'function') { 137 | return location.scraper(); 138 | } 139 | if (typeof location.scraper === 'object') { 140 | // Find the closest date 141 | const targetDate = process.env.SCRAPE_DATE || datetime.getDate(); 142 | let scraperToUse = null; 143 | for (const [date, scraper] of Object.entries(location.scraper)) { 144 | if (datetime.dateIsBeforeOrEqualTo(date, targetDate)) { 145 | scraperToUse = scraper; 146 | } 147 | } 148 | if (scraperToUse === null) { 149 | throw new Error(`Could not find scraper for ${transform.getName(location)} at ${process.env.SCRAPE_DATE}, only have: ${Object.keys(location.scraper).join(', ')}`); 150 | } 151 | return scraperToUse.call(location); 152 | } 153 | 154 | throw new Error('Why on earth is the scraper for %s a %s?', transform.getName(location), typeof scraper); 155 | } 156 | 157 | /* 158 | Begin the scraping process 159 | */ 160 | async function scrape(options) { 161 | const crosscheckReports = {}; 162 | const locations = []; 163 | const errors = []; 164 | for (const location of scrapers) { 165 | if (options.location) { 166 | if (transform.getName(location) !== options.location) { 167 | continue; 168 | } 169 | } 170 | if (options.skip) { 171 | if (transform.getName(location) === options.skip) { 172 | continue; 173 | } 174 | } 175 | if (location.scraper) { 176 | try { 177 | addData(locations, location, await runScraper(location)); 178 | } catch (err) { 179 | console.error(' ❌ Error processing %s: ', transform.getName(location), err); 180 | 181 | errors.push({ 182 | name: transform.getName(location), 183 | url: location.url, 184 | err: err.toString() 185 | }); 186 | } 187 | } 188 | } 189 | 190 | // Normalize data 191 | for (const [index] of Object.entries(locations)) { 192 | const location = locations[index]; 193 | locations[index] = normalize(location); 194 | location.active = location.active === undefined || location.active === null ? transform.getActiveFromLocation(location) : location.active; 195 | } 196 | 197 | // De-dupe data 198 | const seenLocations = {}; 199 | let i = locations.length; 200 | let deDuped = 0; 201 | while (i-- > 0) { 202 | const location = locations[i]; 203 | const locationName = transform.getName(location); 204 | const otherLocation = seenLocations[locationName]; 205 | 206 | if (otherLocation) { 207 | // Take rating into account to break ties 208 | const thisPriority = transform.getPriority(location) + location.rating / 2; 209 | const otherPriority = transform.getPriority(otherLocation) + otherLocation.rating / 2; 210 | 211 | if (otherPriority === thisPriority) { 212 | console.log('⚠️ %s: Equal priority sources choosing %s (%d) over %s (%d) arbitrarily', locationName, location.url, thisPriority, otherLocation.url, otherPriority); 213 | // Kill the other location 214 | locations.splice(locations.indexOf(otherLocation), 1); 215 | deDuped++; 216 | } else if (otherPriority < thisPriority) { 217 | // Kill the other location 218 | console.log('✂️ %s: Using %s (%d) instead of %s (%d)', locationName, location.url, thisPriority, otherLocation.url, otherPriority); 219 | locations.splice(locations.indexOf(otherLocation), 1); 220 | deDuped++; 221 | } else { 222 | // Kill this location 223 | console.log('✂️ %s: Using %s (%d) instead of %s (%d)', locationName, otherLocation.url, otherPriority, location.url, thisPriority); 224 | locations.splice(i, 1); 225 | deDuped++; 226 | } 227 | 228 | const crosscheckReport = crosscheck(location, otherLocation); 229 | if (crosscheckReport) { 230 | console.log('🚨 Crosscheck failed for %s: %s (%d) has different data than %s (%d)', locationName, otherLocation.url, otherPriority, location.url, thisPriority); 231 | crosscheckReports[locationName] = [] || crosscheckReports[locationName]; 232 | crosscheckReports[locationName].push(removePrivate(location)); 233 | crosscheckReports[locationName].push(removePrivate(otherLocation)); 234 | } 235 | } 236 | seenLocations[locationName] = location; 237 | } 238 | 239 | // Generate ratings 240 | const sourceProps = ['rating', 'city', 'county', 'state', 'country', 'type', 'timeseries', 'headless', 'aggregate', 'ssl', 'priority', 'url']; 241 | 242 | const sourcesByURL = {}; 243 | for (const location of locations) { 244 | const sourceObj = { ...location._scraperDefinition }; 245 | for (const prop of sourceProps) { 246 | if (location[prop] !== undefined) { 247 | sourceObj[prop] = location[prop]; 248 | } 249 | } 250 | for (const prop in sourceObj) { 251 | if (prop[0] === '_') { 252 | delete sourceObj[prop]; 253 | } 254 | } 255 | 256 | delete sourceObj.scraper; 257 | 258 | // Remove granularity from the data since this is a report on the scraper 259 | if (sourceObj.aggregate) { 260 | delete sourceObj[sourceObj.aggregate]; 261 | } 262 | 263 | sourcesByURL[location.url] = sourceObj; 264 | sourceObj.rating = calculateRating(sourceObj); 265 | } 266 | let sourceRatings = Object.values(sourcesByURL); 267 | sourceRatings = sourceRatings.sort((a, b) => { 268 | return b.rating - a.rating; 269 | }); 270 | 271 | // Clean data 272 | for (const [index] of Object.entries(locations)) { 273 | locations[index] = clean(locations[index]); 274 | } 275 | 276 | return { locations, errors, deDuped, sourceRatings, crosscheckReports }; 277 | } 278 | 279 | const scrapeData = async ({ report, options }) => { 280 | console.log(`⏳ Scraping data for ${process.env.SCRAPE_DATE ? process.env.SCRAPE_DATE : 'today'}...`); 281 | 282 | const { locations, errors, deDuped, sourceRatings, crosscheckReports } = await scrape(options); 283 | 284 | const locationCounts = { 285 | cities: 0, 286 | states: 0, 287 | counties: 0, 288 | countries: 0 289 | }; 290 | const caseCounts = { 291 | cases: 0, 292 | tested: 0, 293 | recovered: 0, 294 | deaths: 0, 295 | active: 0 296 | }; 297 | for (const location of locations) { 298 | if (!location.state && !location.county) { 299 | locationCounts.countries++; 300 | } else if (!location.county) { 301 | locationCounts.states++; 302 | } else if (!location.city) { 303 | locationCounts.counties++; 304 | } else { 305 | locationCounts.cities++; 306 | } 307 | 308 | for (const type of Object.keys(caseCounts)) { 309 | if (location[type]) { 310 | caseCounts[type] += location[type]; 311 | } 312 | } 313 | } 314 | 315 | console.log('✅ Data scraped!'); 316 | for (const [name, count] of Object.entries(locationCounts)) { 317 | console.log(' - %d %s', count, name); 318 | } 319 | console.log('ℹ️ Total counts (tracked cases, may contain duplicates):'); 320 | for (const [name, count] of Object.entries(caseCounts)) { 321 | console.log(' - %d %s', count, name); 322 | } 323 | 324 | if (errors.length) { 325 | console.log('❌ %d error%s', errors.length, errors.length === 1 ? '' : 's'); 326 | } 327 | 328 | report.scrape = { 329 | numCountries: locationCounts.countries, 330 | numStates: locationCounts.states, 331 | numCounties: locationCounts.counties, 332 | numCities: locationCounts.cities, 333 | numDuplicates: deDuped, 334 | numErrors: errors.length, 335 | crosscheckReports, 336 | errors 337 | }; 338 | 339 | return { locations, report, options, sourceRatings }; 340 | }; 341 | 342 | export default scrapeData; 343 | -------------------------------------------------------------------------------- /lib/fs.test.js: -------------------------------------------------------------------------------- 1 | import each from 'jest-each'; 2 | import * as mockFs from './__test_utils__/fs.js'; 3 | 4 | import * as fs from './fs.js'; 5 | 6 | describe('fs', () => { 7 | describe('exists', () => { 8 | const tests = [['test/somefile'], ['test/asubdirectory/somefile'], ['somefile']]; 9 | 10 | each(tests).test('when given a path to an existing file, it returns true', async pathName => { 11 | mockFs.mock({ 12 | [pathName]: 'file content' 13 | }); 14 | 15 | expect(await fs.exists(pathName)).toBe(true); 16 | 17 | // Don't forget to restore! 18 | mockFs.restore(); 19 | }); 20 | 21 | each(tests).test('when given a path to a directory that exists, it returns true', async pathName => { 22 | mockFs.mock({ 23 | [pathName]: {} 24 | }); 25 | 26 | expect(await fs.exists(pathName)).toBe(true); 27 | 28 | // Don't forget to restore! 29 | mockFs.restore(); 30 | }); 31 | 32 | each(tests).test('when given a path to a file that does not exist, it returns false', async pathName => { 33 | mockFs.mock({}); 34 | 35 | expect(await fs.exists(pathName)).toBe(false); 36 | 37 | // Don't forget to restore! 38 | mockFs.restore(); 39 | }); 40 | }); 41 | 42 | describe('ensureDir', () => { 43 | const tests = [['test/somedir'], ['test/asubdirectory'], ['somedir']]; 44 | 45 | each(tests).test('when given a path with missing directories, it creates the directories', async pathName => { 46 | mockFs.mock({}); 47 | 48 | await fs.ensureDir(pathName); 49 | 50 | expect(await fs.exists(pathName)).toBe(true); 51 | 52 | // Don't forget to restore! 53 | mockFs.restore(); 54 | }); 55 | }); 56 | 57 | describe('readFile', () => { 58 | test('when given an existing file path, it returns the file content', async () => { 59 | const filePath = 'test/file.txt'; 60 | const fileContent = 'some content'; 61 | 62 | mockFs.mock({ 63 | [filePath]: fileContent 64 | }); 65 | 66 | expect(await fs.readFile(filePath)).toBe(fileContent); 67 | 68 | // Don't forget to restore! 69 | mockFs.restore(); 70 | }); 71 | 72 | test('when given a file path that does not exist, it throws a ENOENT', async () => { 73 | const filePath = 'test/file.txt'; 74 | 75 | mockFs.mock({}); 76 | 77 | try { 78 | await fs.readFile(filePath); 79 | } catch (e) { 80 | expect(e.toString()).toMatch('ENOENT'); 81 | // Don't forget to restore! 82 | mockFs.restore(); 83 | return; 84 | } 85 | 86 | // Don't forget to restore! 87 | mockFs.restore(); 88 | 89 | throw new Error('Test did not fail'); 90 | }); 91 | }); 92 | 93 | describe('getFilesInDir', () => { 94 | test('when given a directory with files, it returns the file paths', async () => { 95 | const directory = 'test'; 96 | const directoryFiles = ['test1.html', 'test2.csv']; 97 | 98 | mockFs.mock({ 99 | [directory]: directoryFiles.reduce((dirContent, filePath) => { 100 | dirContent[filePath] = 'some content'; 101 | return dirContent; 102 | }, {}) 103 | }); 104 | 105 | expect(await fs.getFilesInDir(directory)).toStrictEqual(directoryFiles); 106 | 107 | // Don't forget to restore! 108 | mockFs.restore(); 109 | }); 110 | 111 | test('when given a directory path that does not exist, it throws ENOENT', async () => { 112 | mockFs.mock({}); 113 | 114 | try { 115 | await fs.getFilesInDir('testdir'); 116 | } catch (e) { 117 | expect(e.toString()).toMatch('ENOENT'); 118 | // Don't forget to restore! 119 | mockFs.restore(); 120 | return; 121 | } 122 | 123 | // Don't forget to restore! 124 | mockFs.restore(); 125 | 126 | throw new Error('Test did not fail'); 127 | }); 128 | }); 129 | 130 | describe('readJSON', () => { 131 | test('when given an existing JSON file, it returns the file content', async () => { 132 | const filePath = 'test/file.json'; 133 | const jsonContent = { test: 'it works!' }; 134 | const fileContent = JSON.stringify(jsonContent); 135 | 136 | mockFs.mock({ 137 | [filePath]: fileContent 138 | }); 139 | 140 | expect(await fs.readJSON(filePath)).toStrictEqual(jsonContent); 141 | 142 | // Don't forget to restore! 143 | mockFs.restore(); 144 | }); 145 | 146 | test('when given a file path that does not exist, it throws a ENOENT', async () => { 147 | const filePath = 'test/file.json'; 148 | 149 | mockFs.mock({}); 150 | 151 | try { 152 | await fs.readJSON(filePath); 153 | } catch (e) { 154 | expect(e.toString()).toMatch('ENOENT'); 155 | // Don't forget to restore! 156 | mockFs.restore(); 157 | return; 158 | } 159 | 160 | // Don't forget to restore! 161 | mockFs.restore(); 162 | 163 | throw new Error('Test did not fail'); 164 | }); 165 | }); 166 | 167 | describe('readCSV', () => { 168 | test('when given an existing CSV file, it returns the file content', async () => { 169 | const filePath = 'test/file.csv'; 170 | 171 | const csvContent = ` 172 | title1,title2,title3 173 | one,two,three 174 | example1,example2,example3`; 175 | 176 | const expected = [ 177 | { title1: 'one', title2: 'two', title3: 'three' }, 178 | { title1: 'example1', title2: 'example2', title3: 'example3' } 179 | ]; 180 | 181 | mockFs.mock({ 182 | [filePath]: csvContent 183 | }); 184 | 185 | expect(await fs.readCSV(filePath)).toStrictEqual(expected); 186 | 187 | // Don't forget to restore! 188 | mockFs.restore(); 189 | }); 190 | 191 | test('when given a file path that does not exist, it throws a ENOENT', async () => { 192 | const filePath = 'test/file.csv'; 193 | 194 | mockFs.mock({}); 195 | 196 | try { 197 | await fs.readCSV(filePath); 198 | } catch (e) { 199 | expect(e.toString()).toMatch('ENOENT'); 200 | // Don't forget to restore! 201 | mockFs.restore(); 202 | return; 203 | } 204 | 205 | // Don't forget to restore! 206 | mockFs.restore(); 207 | 208 | throw new Error('Test did not fail'); 209 | }); 210 | }); 211 | 212 | describe('writeFile', () => { 213 | test('when given a file path and data, it writes to a file', async () => { 214 | const filePath = 'file.txt'; 215 | const fileContent = 'some file content!'; 216 | 217 | mockFs.mock({}); 218 | 219 | await fs.writeFile(filePath, fileContent); 220 | 221 | expect(await fs.readFile(filePath)).toBe(fileContent); 222 | 223 | // Don't forget to restore! 224 | mockFs.restore(); 225 | }); 226 | 227 | test('when given a file path with a missing directory and ensureDir = false, it throws ENOENT', async () => { 228 | const filePath = 'test/file.txt'; 229 | const fileContent = 'some file content!'; 230 | 231 | mockFs.mock({}); 232 | 233 | try { 234 | await fs.writeFile(filePath, fileContent, { ensureDir: false }); 235 | } catch (e) { 236 | expect(e.toString()).toMatch('ENOENT'); 237 | // Don't forget to restore! 238 | mockFs.restore(); 239 | return; 240 | } 241 | 242 | // Don't forget to restore! 243 | mockFs.restore(); 244 | 245 | throw new Error('Test did not fail'); 246 | }); 247 | 248 | test('when given a file path with missing directories and ensureDir = true, it writes to a file and create directories', async () => { 249 | const filePath = 'test/file.txt'; 250 | const fileContent = 'some file content!'; 251 | 252 | mockFs.mock({}); 253 | 254 | await fs.writeFile(filePath, fileContent, { ensureDir: true }); 255 | 256 | expect(await fs.readFile(filePath)).toBe(fileContent); 257 | 258 | // Don't forget to restore! 259 | mockFs.restore(); 260 | }); 261 | }); 262 | 263 | describe('writeJSON', () => { 264 | test('when given a file path and JSON data, it writes to a file', async () => { 265 | const filePath = 'file.json'; 266 | const fileContent = { test: 'some data' }; 267 | 268 | mockFs.mock({}); 269 | 270 | await fs.writeJSON(filePath, fileContent); 271 | 272 | expect(await fs.readJSON(filePath)).toStrictEqual(fileContent); 273 | 274 | // Don't forget to restore! 275 | mockFs.restore(); 276 | }); 277 | 278 | test('when given a file path with a missing directory and ensureDir = false, it throws ENOENT', async () => { 279 | const filePath = 'test/file.json'; 280 | const fileContent = { test: 'some data' }; 281 | 282 | mockFs.mock({}); 283 | 284 | try { 285 | await fs.writeJSON(filePath, fileContent, { ensureDir: false }); 286 | } catch (e) { 287 | expect(e.toString()).toMatch('ENOENT'); 288 | // Don't forget to restore! 289 | mockFs.restore(); 290 | return; 291 | } 292 | 293 | // Don't forget to restore! 294 | mockFs.restore(); 295 | 296 | throw new Error('Test did not fail'); 297 | }); 298 | 299 | test('when given a file path with missing directories and ensureDir = true, it writes to a file and create directories', async () => { 300 | const filePath = 'test/file.json'; 301 | const fileContent = { test: 'some data' }; 302 | 303 | mockFs.mock({}); 304 | 305 | await fs.writeJSON(filePath, fileContent, { ensureDir: true }); 306 | 307 | expect(await fs.readJSON(filePath)).toStrictEqual(fileContent); 308 | 309 | // Don't forget to restore! 310 | mockFs.restore(); 311 | }); 312 | }); 313 | 314 | describe('writeCSV', () => { 315 | test('when given a file path and CSV data, it writes to a file', async () => { 316 | const filePath = 'test/file.csv'; 317 | const fileContent = [ 318 | { title1: 'title1', title2: 'title2', title3: 'title3' }, 319 | { title1: 'one', title2: 'two', title3: 'three' }, 320 | { title1: 'example1', title2: 'example2', title3: 'example3' } 321 | ]; 322 | 323 | const csvContent = `title1,title2,title3 324 | one,two,three 325 | example1,example2,example3 326 | `; 327 | 328 | mockFs.mock({}); 329 | 330 | await fs.writeCSV(filePath, fileContent); 331 | 332 | expect(await fs.readFile(filePath)).toStrictEqual(csvContent); 333 | 334 | // Don't forget to restore! 335 | mockFs.restore(); 336 | }); 337 | 338 | test('when given a file path with a missing directory and ensureDir = false, it throws ENOENT', async () => { 339 | const filePath = 'test/file.json'; 340 | const fileContent = [ 341 | { title1: 'one', title2: 'two', title3: 'three' }, 342 | { title1: 'example1', title2: 'example2', title3: 'example3' } 343 | ]; 344 | 345 | mockFs.mock({}); 346 | 347 | try { 348 | await fs.writeCSV(filePath, fileContent, { ensureDir: false }); 349 | } catch (e) { 350 | expect(e.toString()).toMatch('ENOENT'); 351 | // Don't forget to restore! 352 | mockFs.restore(); 353 | return; 354 | } 355 | 356 | // Don't forget to restore! 357 | mockFs.restore(); 358 | 359 | throw new Error('Test did not fail'); 360 | }); 361 | 362 | test('when given a file path with missing directories and ensureDir = true, it writes to a file and create directories', async () => { 363 | const filePath = 'test/file.csv'; 364 | const fileContent = [ 365 | { title1: 'title1', title2: 'title2', title3: 'title3' }, 366 | { title1: 'one', title2: 'two', title3: 'three' }, 367 | { title1: 'example1', title2: 'example2', title3: 'example3' } 368 | ]; 369 | 370 | const csvContent = `title1,title2,title3 371 | one,two,three 372 | example1,example2,example3 373 | `; 374 | 375 | mockFs.mock({}); 376 | 377 | await fs.writeCSV(filePath, fileContent, { ensureDir: true }); 378 | 379 | expect(await fs.readFile(filePath)).toStrictEqual(csvContent); 380 | 381 | // Don't forget to restore! 382 | mockFs.restore(); 383 | }); 384 | }); 385 | }); 386 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # coronadatascraper 2 | > A scraper that pulls coronavirus case data from verified sources. 3 | 4 | This project exists to pull county-level data for COVID-19 from verified, high-quality sources. 5 | 6 | Every piece of data produced includes the URL where the data was sourced from as well as a rating of the source's technical quality (completeness, machine readability, best practices -- not accuracy). 7 | 8 | ## Where's the data? 9 | 10 | https://coronadatascraper.com/ 11 | 12 | ## Running the scraper 13 | 14 | First, [fork the repository](https://github.com/lazd/coronadatascraper/fork) so you're ready to contribute back. 15 | 16 | Before following these instructions, install [yarn](https://classic.yarnpkg.com/en/docs/install/). 17 | 18 | #### 1. Clone, init submodules, and add upstream 19 | 20 | Replace `yourusername` below with your Github username: 21 | 22 | ``` 23 | git clone --recursive git@github.com:yourusername/coronadatascraper.git 24 | cd coronadatascraper 25 | git remote add upstream git@github.com:lazd/coronadatascraper.git 26 | ``` 27 | 28 | If you've already cloned without `--recursive`, run: 29 | 30 | ``` 31 | git submodule init 32 | git submodule update 33 | ``` 34 | 35 | #### 2. Install dependencies 36 | 37 | ``` 38 | yarn install 39 | ``` 40 | 41 | #### 3. Run the scraper 42 | 43 | ``` 44 | yarn start 45 | ``` 46 | 47 | #### 4. Pull from upstream often 48 | 49 | This gets you the latest scrapers, as well as the cache so we're not hammering servers. 50 | 51 | ``` 52 | git pull upstream master --recurse-submodules 53 | ``` 54 | 55 | Note: If you are encountering issues updating a submodule such as `Could not access submodule`, you may need to update your fork using: 56 | ``` 57 | git submodule update --init --recursive 58 | ``` 59 | 60 | ### Re-generating old data 61 | 62 | To re-generate old data from cache (or timeseries), run: 63 | 64 | ``` 65 | yarn start --date=2020-3-12 66 | ``` 67 | 68 | To output files without the date suffix, use: 69 | 70 | ``` 71 | yarn start --date=2020-3-12 -o 72 | ``` 73 | 74 | ### Generating timeseries data 75 | 76 | To generate timeseries data in `dist/timeseries*.*`, run: 77 | 78 | ``` 79 | yarn timeseries 80 | ``` 81 | 82 | ### Run only one scraper 83 | 84 | To scrape just one location, use `--location`/`-l` 85 | 86 | ``` 87 | yarn start --location "Ventura County, CA, USA" 88 | ``` 89 | 90 | ### Skipping a scraper 91 | 92 | To skip a scraper, use `--skip`/`-s` 93 | 94 | ``` 95 | yarn start --skip "Ventura County, CA, USA" 96 | ``` 97 | 98 | ### Building the website 99 | 100 | To build the website and all data into `dist/`: 101 | 102 | ``` 103 | yarn build 104 | ``` 105 | 106 | ## Contributing 107 | 108 | Contributions for any place in the world are welcome. See the [community-curated list of verified data sources](https://docs.google.com/spreadsheets/d/1T2cSvWvUvurnOuNFj2AMPGLpuR2yVs3-jdd_urfWU4c/edit#gid=0) to find a new datasource to add, and be sure to update the "Scraped?" column when you do. 109 | 110 | Write clean and clear code, and please ensure to follow the criteria below for sources. Send a pull request with your scraper, and be sure to run the scraper first with the instructions above to make sure the data is valid. 111 | 112 | ### Writing a scraper 113 | 114 | Scrapers can pull JSON, CSV, or good ol' HTML down and are written in a sort of modular way, with a handful of helpers available to clean up the data. Scrapers can pull in data for anything -- cities, counties, states, countries, or collections thereof. See the existing scrapers for ideas on how to deal with different ways of data being presented. 115 | 116 | Start by opening up `scrapers.js` and adding a new object at the top of the array. 117 | 118 | Make sure you have the following properties: 119 | 120 | * `url` - The source of the data 121 | * `scraper` - An async function that scrapes data and returns objects, defined below 122 | 123 | Add the following directly to the scraper object if the data you're pulling in is specific to a given location: 124 | 125 | * `city` - The city name 126 | * `county` - The county or parish 127 | * `state` - The state, province, or region 128 | * `country` - [ISO 3166-1 alpha-3 country code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) 129 | * `type` - on of `json`, `csv`, `table`, `list`, `paragraph`, `pdf`, `image`. assumes `list` if `undefined`. 130 | * `timeseries` - `true` if this source provides timeseries data, `false` or `undefined` if it only provides the latest data 131 | * `headless` - whether this source requires a headless browser to scrape 132 | * `ssl` - `true` or `undefined` if this host has a valid SSL certificate chain, `false` if not 133 | * `priority` - any number (negative or positive). `0` is default, higher priority wins if duplicate data is present, ties are broken by rating 134 | 135 | Your scraper should return a `data` object, or an array of objects, with some of the following information: 136 | 137 | * `city` - The city name (not required if defined on scraper object) 138 | * `county` - The county or parish (not required if defined on scraper object) 139 | * `state` - The state, province, or region (not required if defined on scraper object) 140 | * `country` - [ISO 316 141 | 6-1 alpha-3 country code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) 142 | * `cases` - Total number of cases 143 | * `deaths` - Total number of deaths 144 | * `recovered` - Total number recovered 145 | * `tested` - Total number tested 146 | * `population` - The estimated population of the location 147 | * `coordinates` - Array of coordinates [longitude, latitude] 148 | 149 | Everything defined on the scraper object except the `scraper` function and properties that start with `_` will be added to the objects returned by your scraper. 150 | 151 | #### Sample scraper 152 | 153 | Here's the scraper for Indiana that gets data from a CSV: 154 | 155 | ```javascript 156 | { 157 | url: 'https://opendata.arcgis.com/datasets/d14de7e28b0448ab82eb36d6f25b1ea1_0.csv', 158 | country: 'USA', 159 | state: 'IN', 160 | scraper: async function() { 161 | let data = await fetch.csv(this.url); 162 | 163 | let counties = []; 164 | for (let county of data) { 165 | counties.push({ 166 | county: parse.string(county.COUNTYNAME) + ' County', 167 | cases: parse.number(county.Total_Positive), 168 | deaths: parse.number(county.Total_Deaths), 169 | tested: parse.number(county.Total_Tested) 170 | }); 171 | } 172 | 173 | return counties; 174 | } 175 | }, 176 | ``` 177 | 178 | You can see that `country` and `state` are already defined on the object, and all the scraper has to do is pull down the CSV and return an array of objects. 179 | 180 | Here's the scraper for Oregon that pulls data from a HTML table: 181 | ```javascript 182 | { 183 | state: 'OR', 184 | country: 'USA', 185 | url: 'https://www.oregon.gov/oha/PH/DISEASESCONDITIONS/DISEASESAZ/Pages/emerging-respiratory-infections.aspx', 186 | scraper: async function() { 187 | let counties = []; 188 | let $ = await fetch.page(this.url); 189 | 190 | let $table = $('table[summary="Cases by County in Oregon for COVID-19"]'); 191 | 192 | let $trs = $table.find('tbody > tr:not(:first-child):not(:last-child)'); 193 | 194 | $trs.each((index, tr) => { 195 | let $tr = $(tr); 196 | let county = parse.string($tr.find('td:first-child').text()) + ' County'; 197 | let cases = parse.number($tr.find('td:nth-child(2)').text()); 198 | counties.push({ 199 | county: county, 200 | cases: cases 201 | }); 202 | }); 203 | 204 | return counties; 205 | } 206 | }, 207 | ``` 208 | 209 | It first finds the table with the `[summary]` attribute, then iterates over each of the rows extracting county names and cases (skipping the first and last rows), and finally, returns an array of objects. 210 | 211 | #### Library functions 212 | 213 | See [library functions](lib/) for API of the available library/utility functions you can use in your scraper. 214 | 215 | Of course, if something is missing, `yarn add` it as a dependency and `import` it! 216 | 217 | #### Making sure your scraper doesn't break 218 | 219 | It's a tough challenge to write scrapers that will work when websites are inevitably updated. Here are some tips: 220 | 221 | * Write your scraper so it handles aggregate data with a single scraper entry (i.e. find a table, process the table) 222 | * Try not to hardcode county or city names, instead let the data on the page populate that 223 | * Try to make your scraper less brittle by generated class names (i.e. CSS modules) 224 | * When targeting elements, don't assume order will be the same (i.e. if there are multiple `.count` elements, don't assume the second one is deaths, verify it by parsing the label) 225 | 226 | #### Generating data retroactively 227 | 228 | If your datasource has timeseries data, you can include its data in retroactive regeneration (prior to this project's inception) by checking for `process.env['SCRAPE_DATE']`. This date is your target date; get it in whatever format you need, and only return results from your timeseries dataset from that date. See the JHU scraper for an example. 229 | 230 | #### What to do if a scraper breaks? 231 | 232 | Scrapers need to be able to operate correctly on old data, so updates to scrapers must be backwards compatible. If you know the date the site broke, you can have two implementations (or more) of a scraper in the same function: 233 | 234 | ```javascript 235 | { 236 | state: 'LA', 237 | country: 'USA', 238 | scraper: async function() { 239 | let counties = []; 240 | if (datetime.scrapeDateIsBefore('2020-3-14')) { 241 | // Use the old table 242 | this.url = 'http://ldh.la.gov/Coronavirus/'; 243 | 244 | let $ = await fetch.page(this.url); 245 | 246 | let $table = $('p:contains("Louisiana Cases")') 247 | .nextAll('table') 248 | .find('tbody > tr:not(:last-child)'); 249 | 250 | $trs.each((index, tr) => { 251 | counties.push(...); 252 | }); 253 | } 254 | else { 255 | // Use the new CSV file 256 | this.url = 'https://opendata.arcgis.com/datasets/cba425c2e5b8421c88827dc0ec8c663b_0.csv'; 257 | 258 | let data = await fetch.csv(this.url); 259 | 260 | for (let county of data) { 261 | counties.push(...); 262 | } 263 | } 264 | 265 | // Add state data 266 | counties.push(transform.sumData(counties)); 267 | 268 | return counties; 269 | } 270 | }, 271 | ``` 272 | 273 | As you can see, you can change `this.url` within your function (but be sure to set it every time). 274 | 275 | Another example is when HTML on the page changes, you can simple change the selectors or Cheerio function calls: 276 | 277 | ```javascript 278 | let $table; 279 | if (datetime.scrapeDateIsBefore('2020-3-16')) { 280 | $table = $('table[summary="Texas COVID-19 Cases"]'); 281 | } 282 | else { 283 | $table = $('table[summary="COVID-19 Cases in Texas Counties"]'); 284 | } 285 | ``` 286 | 287 | You can also use `datetime.scrapeDateIsAfter()` for more complex customization. 288 | 289 | ### Criteria for sources 290 | 291 | Any source added to the scraper must meet the following criteria: 292 | 293 | #### 1. Sources must be government or health organizations 294 | 295 | No news articles, no aggregated sources. 296 | 297 | #### 2. Sources must provide the number of cases at a bare minimum 298 | 299 | Additional data is welcome. 300 | 301 | #### 3. Presumptive cases are considered confirmed 302 | 303 | In keeping with other datasets, presumptive cases should be considered part of the case total. 304 | 305 | ### Source rating 306 | 307 | Sources are rated based on: 308 | 309 | 1. **How hard is it to read?** - `csv` and `json` give best scores, with `table` right behind it, with `list` and `paragraph` worse. `pdf` gets no points, and `image` gets negative points. 310 | 2. **Timeseries?** - Sources score points if they provide a timeseries. 311 | 3. **Completeness** - Sources get points for having `cases`, `tested`, `deaths`, `recovered`, `country`, `state`, `county`, and `city`. 312 | 4. **SSL** - Sources get points for serving over ssl 313 | 5. **Headless?** - Sources get docked points if they require a headless scraper 314 | 315 | The maximium rating for a source is 1, the minimum is near 0. See [`lib/transform.calcuateRating`](blob/master/lib/transform.js) for the exact algorithm. 316 | 317 | All data in the output includes the `url` and the `rating` of the source. 318 | 319 | ## SSL 320 | 321 | Some source don't use standard SSL certificates, resulting in fetching errors. You can add additional 322 | SSL certificates in the `ssl` directory. They will automatically be used when fetching data. 323 | 324 | ## License 325 | 326 | This project is licensed under the permissive [BSD 2-clause license](LICENSE). 327 | 328 | The data produced by this project is public domain. 329 | 330 | ## Attribution 331 | 332 | Please cite this project if you use it in your visualization or reporting. 333 | -------------------------------------------------------------------------------- /site/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Corona Data Scraper 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
49 |
50 | 51 |
52 | 55 |
56 | 57 |
58 |
59 | 60 | 61 | 113 | 114 | 115 |
116 | 146 |
147 |
148 | 149 |
150 | 151 |
152 |
153 |
154 |

Corona Data Scraper

155 |
156 |
157 |

Corona Data Scraper pulls COVID-19 Coronavirus case data from verified sources, finds the corresponding GeoJSON features, and adds population data.

158 |

All sources are cited right in the same row as the data.

159 |

Select a file on the left to view and download it.

160 |
161 | 162 |

Can't quite visualize it?

163 | 169 | 170 |

Missing data you've got a source for?

171 |
172 |

Contribute a scraper and your data will appear here tomorrow.

173 |

Join the Slack and chat in #scraper-dev if you have any questions

174 |
175 |

Found a problem with the data?

176 |
177 |

File an issue on Github with complete details of the issue.

178 |
179 |

Use this data?

180 |
181 |

Cite this website and file an issue to have your project's URL added.

182 |
183 |
184 | 188 |
189 | 190 | 207 | 208 | 227 | 228 | 245 |
246 |
247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /site/index.js: -------------------------------------------------------------------------------- 1 | /* global document, XMLHttpRequest, window, history, mapboxgl, Handsontable, Papa, JSONFormatter, d3 */ 2 | /* eslint no-use-before-define: "off" */ 3 | /* eslint guard-for-in: "off" */ 4 | /* eslint no-new: "off" */ 5 | /* eslint no-restricted-globals: "off" */ 6 | /* eslint no-useless-escape: "off" */ 7 | 8 | const data = {}; 9 | let map; 10 | 11 | const noCasesColor = 'rgba(255, 255, 255, 0.5)'; 12 | const noPopulationDataColor = '#AAAAAA'; 13 | 14 | const outlineColorHighlight = 'rgb(0,0,0)'; 15 | const outlineColor = 'rgba(175, 175, 175, 0.5)'; 16 | 17 | const choroplethColors = { 18 | stoplight: ['#eeffcd', '#b4ffa5', '#ffff00', '#ff7f00', '#ff0000'], 19 | heat: ['#FFFFFF', '#ffff5e', '#ffe70c', '#fead0a', '#fd6f08', '#fd2907', '#fd0407'], 20 | peach: ['rgb(253,222,166)', 'rgb(255,188,134)', 'rgb(249,152,133)', 'rgb(232,110,129)', 'rgb(224,88,136)'], 21 | pink: ['rgb(255, 244, 221)', 'rgb(255, 221, 215)', 'rgb(255, 197, 210)', 'rgb(254, 174, 203)', 'rgb(250, 150, 196)', 'rgb(245, 126, 189)', 'rgb(239, 100, 181)', 'rgb(232, 70, 173)', 'rgb(210, 56, 161)', 'rgb(187, 46, 150)', 'rgb(163, 36, 140)', 'rgb(138, 27, 131)', 'rgb(113, 22, 124)', 'rgb(86, 15, 116)', 'rgb(55, 11, 110)', 'rgb(0, 9, 104)'], 22 | viridis: ['#fde725', '#d8e219', '#addc30', '#84d44b', '#5ec962', '#3fbc73', '#28ae80', '#1fa088', '#21918c', '#26828e', '#2c728e', '#33638d', '#3b528b', '#424086', '#472d7b', '#48186a'], 23 | magma: ['#fcfdbf', '#fde2a3', '#fec488', '#fea772', '#fc8961', '#f56b5c', '#e75263', '#d0416f', '#b73779', '#9c2e7f', '#832681', '#6a1c81', '#51127c', '#36106b', '#1d1147', '#0a0822'] 24 | }; 25 | 26 | const choroplethStyle = 'pureRatio'; 27 | 28 | const choroplethColor = 'stoplight'; 29 | 30 | const type = 'cases'; 31 | 32 | const choroplethStyles = { 33 | pureRatio(location, locationData, type, rank, totalRanked, worstAffectedPercent) { 34 | // Color based on how bad it is, relative to the worst place 35 | const affectedPercent = locationData[type] / location.population; 36 | const percentRatio = affectedPercent / worstAffectedPercent; 37 | 38 | return adjustTanh(percentRatio, 0.15, 2); 39 | }, 40 | rankAdjustedRatio(location, locationData, type, rank, totalRanked, worstAffectedPercent) { 41 | // Color based on rank 42 | const rankRatio = (totalRanked - rank) / totalRanked; 43 | 44 | // Color based on how bad it is, relative to the worst place 45 | const percentRatio = locationData[type] / location.population / worstAffectedPercent; 46 | 47 | const ratio = (rankRatio * 0.75 + percentRatio) / 1.75; 48 | 49 | return ratio; 50 | }, 51 | rankRatio(location, locationData, type, rank, totalRanked) { 52 | // Color based on rank 53 | const rankRatio = (totalRanked - rank) / totalRanked; 54 | 55 | return rankRatio; 56 | } 57 | }; 58 | 59 | function getRatio(fractional, total) { 60 | if (fractional === 0) { 61 | return '-'; 62 | } 63 | return `1 : ${Math.round(total / fractional).toLocaleString()}`; 64 | } 65 | 66 | // Via https://math.stackexchange.com/a/57510 67 | function adjustTanh(value, a = 0.1, b = 1.75) { 68 | return Math.min(Math.tanh(value + a) * b, 1); 69 | } 70 | 71 | function getLocationsByRank(currentData, type, min = 3) { 72 | let rankedItems = []; 73 | 74 | for (const locationId in currentData) { 75 | const locationData = currentData[locationId]; 76 | const location = data.locations[locationId]; 77 | 78 | if (location.population && locationData[type] >= min) { 79 | rankedItems.push({ locationId, rate: locationData[type] / location.population }); 80 | } 81 | } 82 | 83 | rankedItems = rankedItems.sort((a, b) => { 84 | if (a.rate === b.rate) { 85 | return 0; 86 | } 87 | if (a.rate > b.rate) { 88 | return -1; 89 | } 90 | 91 | return 1; 92 | }); 93 | 94 | const locations = []; 95 | for (const rankedItem of rankedItems) { 96 | locations.push(data.locations[rankedItem.locationId]); 97 | } 98 | 99 | return locations; 100 | } 101 | 102 | function getColorOnGradient(colors, position) { 103 | if (position === 1) { 104 | return colors[colors.length - 1]; 105 | } 106 | if (position === 0) { 107 | return colors[0]; 108 | } 109 | 110 | const index = Math.floor(position * (colors.length - 1)); 111 | const startColor = colors[index]; 112 | const endColor = colors[index + 1]; 113 | const alpha = position * (colors.length - 1) - index; 114 | return d3.interpolateRgb(startColor, endColor)(alpha); 115 | } 116 | 117 | function populateMap() { 118 | const currentDate = Object.keys(data.timeseries).pop(); 119 | const currentData = data.timeseries[currentDate]; 120 | 121 | const locationsByRank = getLocationsByRank(currentData, type, 1); 122 | 123 | let foundFeatures = 0; 124 | let worstAffectedPercent = 0; 125 | data.locations.forEach(function(location, index) { 126 | // Calculate worst affected percent 127 | if (location.population) { 128 | const locationData = currentData[index]; 129 | if (locationData) { 130 | const infectionPercent = locationData.cases / location.population; 131 | if (infectionPercent > worstAffectedPercent) { 132 | worstAffectedPercent = infectionPercent; 133 | } 134 | } 135 | } 136 | // Associated the feature with the location 137 | if (location.featureId) { 138 | const feature = data.features.features[location.featureId]; 139 | if (feature) { 140 | foundFeatures++; 141 | feature.properties.locationId = index; 142 | } 143 | } 144 | }); 145 | 146 | data.features.features.forEach(function(feature, index) { 147 | feature.id = index; 148 | let color = null; 149 | const { locationId } = feature.properties; 150 | const location = data.locations[locationId]; 151 | if (location && location.population) { 152 | const locationData = currentData[locationId]; 153 | if (locationData) { 154 | if (locationData.cases === 0) { 155 | color = noCasesColor; 156 | } else { 157 | const rank = locationsByRank.indexOf(location); 158 | const scaledColorValue = choroplethStyles[choroplethStyle](location, locationData, type, rank, locationsByRank.length, worstAffectedPercent); 159 | color = getColorOnGradient(choroplethColors[choroplethColor], scaledColorValue); 160 | } 161 | } 162 | } 163 | 164 | feature.properties.color = color || noPopulationDataColor; 165 | }); 166 | 167 | console.log('Found locations for %d of %d features', foundFeatures, data.features.features.length); 168 | 169 | function isCountry(location) { 170 | return location && location.country && !location.state && !location.county && !location.city; 171 | } 172 | 173 | function isState(location) { 174 | return location && location.state && !location.county && !location.city; 175 | } 176 | 177 | function isCounty(location) { 178 | return location && location.county && !location.city; 179 | } 180 | 181 | function isCity(location) { 182 | return location && location.city; 183 | } 184 | 185 | function getLocationGranularityName(location) { 186 | if (isCountry(location)) { 187 | return 'country'; 188 | } 189 | if (isState(location)) { 190 | return 'state'; 191 | } 192 | if (isCounty(location)) { 193 | return 'county'; 194 | } 195 | if (isCity(location)) { 196 | return 'city'; 197 | } 198 | return 'none'; 199 | } 200 | 201 | const countryFeatures = { 202 | type: 'FeatureCollection', 203 | features: data.features.features.filter(function(feature) { 204 | return isCountry(data.locations[feature.properties.locationId]); 205 | }) 206 | }; 207 | 208 | const stateFeatures = { 209 | type: 'FeatureCollection', 210 | features: data.features.features.filter(function(feature) { 211 | return isState(data.locations[feature.properties.locationId]); 212 | }) 213 | }; 214 | 215 | const countyFeatures = { 216 | type: 'FeatureCollection', 217 | features: data.features.features.filter(function(feature) { 218 | return isCounty(data.locations[feature.properties.locationId]); 219 | }) 220 | }; 221 | 222 | const paintConfig = { 223 | // 'fill-outline-color': 'rgba(255, 255, 255, 1)', 224 | 'fill-color': ['get', 'color'], 225 | 'fill-outline-color': ['case', ['boolean', ['feature-state', 'hover'], false], outlineColorHighlight, outlineColor], 226 | 'fill-opacity': 1 227 | }; 228 | 229 | map.addSource('CDS-country', { 230 | type: 'geojson', 231 | data: countryFeatures 232 | }); 233 | 234 | map.addLayer({ 235 | id: 'CDS-country', 236 | type: 'fill', 237 | source: 'CDS-country', 238 | layout: {}, 239 | paint: paintConfig 240 | }); 241 | 242 | map.addSource('CDS-state', { 243 | type: 'geojson', 244 | data: stateFeatures 245 | }); 246 | 247 | map.addLayer({ 248 | id: 'CDS-state', 249 | type: 'fill', 250 | source: 'CDS-state', 251 | layout: {}, 252 | paint: paintConfig 253 | }); 254 | 255 | map.addSource('CDS-county', { 256 | type: 'geojson', 257 | data: countyFeatures 258 | }); 259 | 260 | map.addLayer({ 261 | id: 'CDS-county', 262 | type: 'fill', 263 | source: 'CDS-county', 264 | layout: {}, 265 | paint: paintConfig 266 | }); 267 | 268 | // Create a popup, but don't add it to the map yet. 269 | const popup = new mapboxgl.Popup({ 270 | closeButton: false, 271 | closeOnClick: false 272 | }); 273 | 274 | let hoveredFeatureId = null; 275 | let hoveredFeatureSource = null; 276 | 277 | function handleMouseLeave() { 278 | map.getCanvas().style.cursor = ''; 279 | popup.remove(); 280 | if (hoveredFeatureId) { 281 | map.setFeatureState({ source: 'CDS-state', id: hoveredFeatureId }, { hover: false }); 282 | } 283 | } 284 | 285 | function handleMouseMove(e) { 286 | if (e.features.length > 0) { 287 | e.preventDefault(); 288 | const feature = e.features[0]; 289 | 290 | const { locationId } = feature.properties || {}; 291 | const location = data.locations[locationId] || {}; 292 | const locationData = currentData[locationId] || {}; 293 | 294 | if (hoveredFeatureId) { 295 | map.setFeatureState({ source: hoveredFeatureSource, id: hoveredFeatureId }, { hover: false }); 296 | } 297 | 298 | hoveredFeatureId = feature.id; 299 | hoveredFeatureSource = `CDS-${getLocationGranularityName(location)}`; 300 | 301 | if (hoveredFeatureId) { 302 | map.setFeatureState({ source: hoveredFeatureSource, id: hoveredFeatureId }, { hover: true }); 303 | } 304 | 305 | // Change the cursor style as a UI indicator. 306 | map.getCanvas().style.cursor = 'pointer'; 307 | 308 | // Populate the popup and set its coordinates 309 | // based on the feature found. 310 | popup 311 | .setLngLat(e.lngLat) 312 | .setHTML(popupTemplate(location, locationData, feature)) 313 | .addTo(map); 314 | } 315 | } 316 | 317 | // When the user moves their mouse over the state-fill layer, we'll update the 318 | // feature state for the feature under the mouse. 319 | map.on('mousemove', 'CDS-country', handleMouseMove); 320 | map.on('mousemove', 'CDS-state', handleMouseMove); 321 | map.on('mousemove', 'CDS-county', handleMouseMove); 322 | 323 | // When the mouse leaves the state-fill layer, update the feature state of the 324 | // previously hovered feature. 325 | map.on('mouseleave', 'CDS-country', handleMouseLeave); 326 | map.on('mouseleave', 'CDS-state', handleMouseLeave); 327 | map.on('mouseleave', 'CDS-county', handleMouseLeave); 328 | } 329 | 330 | function popupTemplate(location, locationData) { 331 | let htmlString = `
`; 332 | htmlString += `
${location.name}
`; 333 | htmlString += ``; 334 | if (location.population !== undefined) { 335 | htmlString += ``; 336 | } else { 337 | htmlString += ``; 338 | } 339 | if (location.population && locationData.cases) { 340 | htmlString += ``; 341 | } 342 | if (locationData.cases !== undefined) { 343 | htmlString += ``; 344 | } 345 | if (locationData.deaths !== undefined) { 346 | htmlString += ``; 347 | } 348 | if (locationData.recovered !== undefined) { 349 | htmlString += ``; 350 | } 351 | if (locationData.active !== locationData.cases) { 352 | htmlString += ``; 353 | } 354 | htmlString += `
Population:${location.population.toLocaleString()}
NO POPULATION DATA
Infected:${getRatio(locationData.cases, location.population)}
Cases:${locationData.cases.toLocaleString()}
Deaths:${locationData.deaths.toLocaleString()}
Recovered:${locationData.recovered.toLocaleString()}
Active:${locationData.active.toLocaleString()}
`; 355 | htmlString += `
`; 356 | return htmlString; 357 | } 358 | 359 | function showMap() { 360 | mapboxgl.accessToken = 'pk.eyJ1IjoibGF6ZCIsImEiOiJjazd3a3VoOG4wM2RhM29rYnF1MDJ2NnZrIn0.uPYVImW8AVA71unqE8D8Nw'; 361 | map = new mapboxgl.Map({ 362 | container: 'map', 363 | style: 'mapbox://styles/lazd/ck7wkzrxt0c071ip932rwdkzj', 364 | center: [-121.403732, 40.492392], 365 | zoom: 3 366 | }); 367 | 368 | let remaining = 0; 369 | function loadData(url, field, callback) { 370 | remaining++; 371 | fetchJSON(url, function(obj) { 372 | data[field] = obj; 373 | if (typeof callback === 'function') { 374 | callback(obj); 375 | } 376 | handleLoaded(); 377 | }); 378 | } 379 | 380 | function handleLoaded() { 381 | remaining--; 382 | if (remaining === 0) { 383 | if (map.loaded()) { 384 | populateMap(); 385 | } else { 386 | map.once('load', populateMap); 387 | } 388 | } 389 | } 390 | 391 | loadData('locations.json', 'locations'); 392 | loadData('timeseries.json', 'timeseries'); 393 | loadData('features.json', 'features'); 394 | } 395 | 396 | document.addEventListener('DOMContentLoaded', function() { 397 | const sidebar = document.querySelector('.spectrum-Site-sideBar'); 398 | const overlay = document.querySelector('.spectrum-Site-overlay'); 399 | 400 | const pages = { 401 | '#home': '.cds-Home', 402 | '#editor': '.cds-FileEditor', 403 | '#sources': '.cds-Sources', 404 | '#features.json': '.cds-Map' 405 | }; 406 | 407 | const routes = { 408 | '#sources': showSources, 409 | '#home': function() {}, 410 | '#features.json': showMap 411 | }; 412 | 413 | function openSidebar() { 414 | sidebar.classList.add('is-open'); 415 | overlay.classList.add('is-open'); 416 | } 417 | 418 | function closeSidebar() { 419 | sidebar.classList.remove('is-open'); 420 | overlay.classList.remove('is-open'); 421 | } 422 | 423 | // Todo make this site less crappy and don't copy paste this 424 | function getName(location) { 425 | let name = ''; 426 | let sep = ''; 427 | if (location.city) { 428 | name += location.city; 429 | sep = ', '; 430 | } 431 | if (location.county) { 432 | name += sep + location.county; 433 | sep = ', '; 434 | } 435 | if (location.state) { 436 | name += sep + location.state; 437 | sep = ', '; 438 | } 439 | if (location.country) { 440 | name += sep + location.country; 441 | sep = ', '; 442 | } 443 | return name; 444 | } 445 | 446 | function getGrade(rating) { 447 | rating *= 200; 448 | 449 | if (rating >= 97) { 450 | return 'A+'; 451 | } 452 | if (rating >= 93) { 453 | return 'A'; 454 | } 455 | if (rating >= 90) { 456 | return 'A-'; 457 | } 458 | if (rating >= 87) { 459 | return 'B+'; 460 | } 461 | if (rating >= 83) { 462 | return 'B'; 463 | } 464 | if (rating >= 80) { 465 | return 'B-'; 466 | } 467 | if (rating >= 77) { 468 | return 'C+'; 469 | } 470 | if (rating >= 73) { 471 | return 'C'; 472 | } 473 | if (rating >= 70) { 474 | return 'C-'; 475 | } 476 | if (rating >= 67) { 477 | return 'D+'; 478 | } 479 | if (rating >= 63) { 480 | return 'D'; 481 | } 482 | if (rating >= 60) { 483 | return 'D'; 484 | } 485 | if (rating >= 57) { 486 | return 'F+'; 487 | } 488 | if (rating >= 53) { 489 | return 'F'; 490 | } 491 | if (rating >= 50) { 492 | return 'F'; 493 | } 494 | return 'F-'; 495 | } 496 | 497 | function ratingTemplate(source, index) { 498 | const typeIcons = { 499 | json: '✅', 500 | csv: '✅', 501 | table: '⚠️', 502 | list: '❌', 503 | paragraph: '🤮' 504 | }; 505 | const typeNames = { 506 | json: 'JSON', 507 | csv: 'CSV' 508 | }; 509 | 510 | let granular = source.city || source.county; 511 | let granularity = 'country-level'; 512 | if (source.city || source.aggregate === 'city') { 513 | granularity = 'city-level'; 514 | granular = true; 515 | } else if (source.county || source.aggregate === 'county') { 516 | granularity = 'county-level'; 517 | granular = true; 518 | } else if (source.state || source.aggregate === 'state') { 519 | granularity = 'state-level'; 520 | } 521 | 522 | const sourceName = source.url.match(/^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/?\n]+)/)[1]; 523 | const slug = `sources:${getName(source) 524 | .replace(/,/g, '-') 525 | .replace(/\s/g, '')}`; 526 | 527 | return ` 528 |
  • 529 |
    ${getGrade(source.rating).replace(/([\+\-])/, '$1')}
    530 |
    531 |

    ${index + 1}. ${getName(source)}

    532 |

    ${sourceName}

    533 |
    534 |
    535 | ${typeIcons[source.type]} ${typeNames[source.type] || source.type.substr(0, 1).toUpperCase() + source.type.substr(1)} 536 |
    537 |
    538 | ${source.timeseries ? '✅' : '❌'} Timeseries 539 |
    540 |
    541 | ${source.aggregate ? '✅' : '❌'} Aggregate 542 |
    543 |
    544 | ${source.ssl ? '✅' : '❌'} SSL 545 |
    546 |
    547 | ${source.headless ? '❌' : '✅'} ${source.headless ? 'Requires' : ' Does not require'} JavaScript 548 |
    549 |
    550 | ${granular ? '✅' : '❌'} Granularity (${granularity}) 551 |
    552 |
    553 |
    554 |
  • 555 | `; 556 | } 557 | 558 | function showSources() { 559 | const list = document.querySelector('.cds-Sources-list'); 560 | fetchJSON('ratings.json', function(ratings) { 561 | list.innerHTML = ''; 562 | for (let i = 0; i < ratings.length; i++) { 563 | list.insertAdjacentHTML('beforeend', ratingTemplate(ratings[i], i)); 564 | } 565 | if (window.location.hash.indexOf(':') !== -1) { 566 | document.getElementById(window.location.hash.substr(1)).scrollIntoView({ 567 | behavior: 'smooth', 568 | block: 'start' 569 | }); 570 | } 571 | }); 572 | } 573 | 574 | let currentPage = null; 575 | function showPage(pageToShow, noPush) { 576 | // Set selected 577 | const currentSideLink = document.querySelector(`.spectrum-SideNav-item a[href="${pageToShow}"]`) || document.querySelector(`.spectrum-SideNav-item a[href="${pageToShow.replace('#', '')}"]`); 578 | const currentSideItem = currentSideLink && currentSideLink.closest('.spectrum-SideNav-item'); 579 | const otherSideItem = document.querySelector('.spectrum-SideNav-item.is-selected'); 580 | if (otherSideItem) { 581 | otherSideItem.classList.remove('is-selected'); 582 | } 583 | if (currentSideItem) { 584 | currentSideItem.classList.add('is-selected'); 585 | } 586 | 587 | for (const page in pages) { 588 | const selector = pages[page]; 589 | if (page === pageToShow) { 590 | document.querySelector(selector).hidden = false; 591 | } else { 592 | document.querySelector(selector).hidden = true; 593 | } 594 | } 595 | 596 | if (routes[pageToShow]) { 597 | if (!noPush) { 598 | history.pushState(null, '', pageToShow); 599 | } 600 | routes[pageToShow](); 601 | } 602 | 603 | currentPage = pageToShow; 604 | 605 | closeSidebar(); 606 | } 607 | 608 | function loadFile(url, dataLevels, noPush) { 609 | document.body.classList.add('is-editing'); 610 | 611 | const editor = document.querySelector('.cds-FileEditor'); 612 | 613 | fetchURL(url, function() { 614 | editor.querySelector('.cds-Heading').innerText = url; 615 | 616 | const extension = url.split('.').pop(); 617 | 618 | showPage('#editor'); 619 | editor.querySelector('.cds-Editor-download').href = url; 620 | if (extension === 'json') { 621 | let obj; 622 | try { 623 | obj = JSON.parse(this.responseText); 624 | } catch (error) { 625 | editor.querySelector('.cds-FileEditor-content').innerHTML = `
    Failed to load ${url}: ${error}
    `; 626 | return; 627 | } 628 | const formatter = new JSONFormatter(obj, dataLevels || 1); 629 | 630 | editor.querySelector('.cds-Editor-content').innerHTML = '
    '; 631 | editor.querySelector('.cds-Editor-content').firstElementChild.appendChild(formatter.render()); 632 | } else { 633 | const parsedData = Papa.parse(this.responseText, { 634 | header: true, 635 | skipEmptyLines: true 636 | }); 637 | 638 | editor.querySelector('.cds-Editor-content').innerHTML = ''; 639 | new Handsontable(editor.querySelector('.cds-Editor-content'), { 640 | data: parsedData.data, 641 | rowHeaders: true, 642 | colHeaders: parsedData.meta.fields, 643 | columnSorting: true, 644 | licenseKey: 'non-commercial-and-evaluation' 645 | }); 646 | } 647 | 648 | // Select menu item 649 | const previousItem = editor.querySelector('.spectrum-SideNav-item.is-selected'); 650 | if (previousItem) { 651 | previousItem.classList.remove('is-selected'); 652 | } 653 | 654 | document 655 | .querySelector(`a[href="${url}"]`) 656 | .closest('.spectrum-SideNav-item') 657 | .classList.add('is-selected'); 658 | }); 659 | 660 | if (!noPush) { 661 | history.pushState(null, '', `#${url}`, ''); 662 | } 663 | } 664 | 665 | function getHashStart() { 666 | return window.location.hash.split(':')[0]; 667 | } 668 | 669 | function handleHashChange() { 670 | if (window.location.hash) { 671 | if (routes[getHashStart()]) { 672 | if (currentPage !== getHashStart()) { 673 | showPage(getHashStart(), true); 674 | } 675 | } else if (window.location.hash.match('.csv') || window.location.hash.match('.json')) { 676 | loadFile(window.location.hash.substr(1), null, true); 677 | } 678 | } else { 679 | showPage('#home', false); 680 | } 681 | } 682 | 683 | window.addEventListener('hashchange', handleHashChange, false); 684 | 685 | document.addEventListener('click', function(evt) { 686 | const button = evt.target.closest('button'); 687 | if (button && button.classList.contains('js-toggleMenu')) { 688 | openSidebar(); 689 | } 690 | 691 | if (evt.target.closest('.spectrum-Site-overlay')) { 692 | closeSidebar(); 693 | } 694 | }); 695 | 696 | document.addEventListener('click', function(evt) { 697 | const target = evt.target.closest('a'); 698 | if (target) { 699 | if (target.tagName === 'A' && target.hasAttribute('download') && !target.hasAttribute('data-noview')) { 700 | // Stop download 701 | evt.preventDefault(); 702 | 703 | const url = target.getAttribute('href'); 704 | if (url === 'features.json') { 705 | showPage('#features.json'); 706 | } else { 707 | loadFile(url, target.getAttribute('data-levels')); 708 | } 709 | } else if (target.tagName === 'A' && routes[target.getAttribute('href')]) { 710 | // Stop download 711 | evt.preventDefault(); 712 | 713 | showPage(target.getAttribute('href')); 714 | } 715 | } 716 | }); 717 | 718 | // Init 719 | handleHashChange(); 720 | }); 721 | 722 | // / Duplicated stuff because we don't have a bundler 723 | 724 | function fetchURL(url, callback) { 725 | const req = new XMLHttpRequest(); 726 | req.addEventListener('load', callback); 727 | req.open('GET', url); 728 | req.send(); 729 | return req; 730 | } 731 | 732 | function fetchJSON(url, callback) { 733 | return fetchURL(url, function() { 734 | let obj; 735 | try { 736 | obj = JSON.parse(this.responseText); 737 | } catch (err) { 738 | console.error('Failed to parse JSON from %s: %s', url, err); 739 | } 740 | callback(obj); 741 | }); 742 | } 743 | --------------------------------------------------------------------------------