├── CHANGELOG.md ├── requirements.txt ├── .github ├── linters │ └── .markdown-lint.yml └── workflows │ └── javascript.yml ├── projects ├── win10_apps.md ├── browser_extensions.md ├── analysis.md ├── bots.md ├── mobile_apps.md └── miscellaneous.md ├── documentation ├── index.md ├── districtdaily.md ├── timeseries.min.md ├── statedaily.md ├── statedistrictwise.md ├── statetestdata.md ├── v4_data.md ├── rawdata.md └── csv │ └── index.md ├── src ├── sanity_check.js ├── generate_locale.js ├── obsolete │ ├── concat_data.js_bak │ ├── raw_sheet_to_json.js_bak │ ├── snapshot_zones.js_bak │ ├── generate_districts_daily.js_bak │ ├── split_raw_data.js_bak │ ├── sheet-to-json.js_bak │ ├── raw_data-to-state_district_wise_data.js_bak │ ├── ultimate_parser.js_bak │ └── geocoder.py_bak ├── states_daily_to_csv.js ├── district_data_generator.js ├── generate_activity_log.js ├── sheet-to-json_generic.js ├── sheets-to-csv.js ├── build_raw_data.py └── parser_v3.py ├── .gitignore ├── package.json ├── LICENSE_CODE ├── lib ├── index.js ├── notify_tg.js └── constants.js ├── main.sh ├── README.md └── LICENSE_DATA /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | =========== 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mapbox==0.18.0 2 | urllib3==1.25.9 3 | pandas==1.0.5 4 | -------------------------------------------------------------------------------- /.github/linters/.markdown-lint.yml: -------------------------------------------------------------------------------- 1 | comment: Relaxed rules 2 | default: true 3 | line_length: false 4 | -------------------------------------------------------------------------------- /projects/win10_apps.md: -------------------------------------------------------------------------------- 1 | # Windows 10 apps using this API 2 | 3 | - [NCovid19 for Windows 10](https://www.microsoft.com/store/apps/9N4KZMGF4JRS) 4 | - By: Rohan Mahindrakar [@rohan12](https://github.com/ROHAN12) 5 | -------------------------------------------------------------------------------- /documentation/index.md: -------------------------------------------------------------------------------- 1 | # COVID19-India API Index 2 | 3 | ## A detailed documentation of all the APIs. The logic extends to CSV files wherever the corresponding CSV endpoints are provided 4 | 5 | - [raw_data{n}.json](rawdata.md) 6 | - [v4/min/data.min.json](v4_data.md) 7 | - [v4/min/timeseries.min.json](timeseries.min.md) 8 | -------------------------------------------------------------------------------- /projects/browser_extensions.md: -------------------------------------------------------------------------------- 1 | # Browser extensions using this API 2 | 3 | ## _Both Chrome and Firefox:_ 4 | 5 | - [Covid-19 Tracker](https://coronatrends.live) (by [@akanshgulati](https://github.com/akanshgulati)) 6 | 7 | ## _Firefox Extension:_ 8 | 9 | - [CovidTrack India](https://addons.mozilla.org/en-US/firefox/addon/covidtrack-india/) (by [@mdb571](https://github.com/mdb571)) 10 | -------------------------------------------------------------------------------- /src/sanity_check.js: -------------------------------------------------------------------------------- 1 | const data = require('../tmp/data.json') 2 | 3 | var total = data.statewise[0] 4 | var err = false 5 | if (total.deltaconfirmed > 425000) { 6 | console.error('Delta confirmed is greater than the limit. Please verify') 7 | err = true 8 | } 9 | 10 | if (err) { 11 | console.error('Sanity check failed. Not committing!') 12 | process.exit(1) 13 | } else { 14 | console.log('No known data errors. Proceeding to commit!') 15 | } 16 | -------------------------------------------------------------------------------- /src/generate_locale.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const locales = require('../tmp/locales.json') 3 | 4 | var dir = './tmp/locales/' 5 | if (!fs.existsSync(dir)) { 6 | fs.mkdirSync(dir, { recursive: true }) 7 | } 8 | 9 | console.log(locales.locales[0]) 10 | var allFiles = {} 11 | 12 | for (var key in locales.locales[0]) { 13 | allFiles[key] = {} 14 | } 15 | 16 | locales.locales.forEach(element => { 17 | for (var key in element) { 18 | allFiles[key][element.english] = element[key] 19 | } 20 | }) 21 | 22 | for (key in allFiles) { 23 | fs.writeFileSync('./tmp/locales/locale_' + key + '.json', JSON.stringify(allFiles[key], null, 2)) 24 | } 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build and Release Folders 2 | bin-debug/ 3 | bin-release/ 4 | [Oo]bj/ 5 | [Bb]in/ 6 | 7 | # Other files and folders 8 | .settings/ 9 | 10 | # Executables 11 | *.swf 12 | *.air 13 | *.ipa 14 | *.apk 15 | 16 | # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties` 17 | # should NOT be excluded as they contain compiler settings and other important 18 | # information for Eclipse / Flash Builder. 19 | # 20 | # 21 | 22 | # Logs 23 | logs 24 | *.log 25 | npm-debug.log* 26 | yarn-debug.log* 27 | yarn-error.log* 28 | 29 | # Dependency directories 30 | node_modules/ 31 | jspm_packages/ 32 | 33 | venv/ 34 | tmp/ 35 | .vscode 36 | 37 | # Vim 38 | *.swp 39 | *.swo 40 | 41 | # Debug script 42 | fetch.sh 43 | -------------------------------------------------------------------------------- /src/obsolete/concat_data.js_bak: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const raw_data1 = require('../tmp/raw_data1.json') 3 | const raw_data2 = require('../tmp/raw_data2.json') 4 | const deaths_recoveries1 = require('../tmp/deaths_recoveries1.json') 5 | const deaths_recoveries2 = require('../tmp/deaths_recoveries2.json') 6 | 7 | raw_data2.raw_data = raw_data1.raw_data.concat(raw_data2.raw_data) 8 | deaths_recoveries2.deaths_recoveries = deaths_recoveries1.deaths_recoveries.concat(deaths_recoveries2.deaths_recoveries) 9 | 10 | // raw_array = raw_array.filter(item => !item.detectedstate); 11 | 12 | fs.writeFileSync('./tmp/raw_data.json', JSON.stringify(raw_data2, null, 2)) 13 | fs.writeFileSync('./tmp/deaths_recoveries.json', JSON.stringify(deaths_recoveries2, null, 2)) 14 | -------------------------------------------------------------------------------- /src/obsolete/raw_sheet_to_json.js_bak: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const fetch = require('node-fetch') 3 | 4 | sheet_id = 'ovd0hzm' 5 | const url = 'https://spreadsheets.google.com/feeds/cells/1nzXUdaIWC84QipdVGUKTiCSc5xntBbpMpzLm6Si33zk/' + sheet_id + '/public/values?alt=json' 6 | 7 | const settings = { method: 'Get' } 8 | fetch(url, settings) 9 | .then(res => res.json()) 10 | .then((json) => { 11 | console.log(json.feed.updated.$t) 12 | 13 | // validating json file 14 | function IsValidJSONString (str) { 15 | try { 16 | JSON.parse(str) 17 | } catch (e) { 18 | return false 19 | } 20 | return true 21 | } 22 | 23 | latest = JSON.stringify(json, null, '\t') 24 | fs.writeFileSync('state_wise_raw.json', latest) 25 | console.log('completed the op!') 26 | }) 27 | -------------------------------------------------------------------------------- /src/obsolete/snapshot_zones.js_bak: -------------------------------------------------------------------------------- 1 | const zones = require('../tmp/zones.json') 2 | const fs = require('fs') 3 | const moment = require('moment-timezone') 4 | 5 | var dir = './tmp/zones_daily/' 6 | if (!fs.existsSync(dir)) { 7 | fs.mkdirSync(dir, { recursive: true }) 8 | } 9 | 10 | const now = moment().unix() 11 | var date = moment.unix(now) 12 | formated_date = date.tz('Asia/Kolkata').format('YYYY-MM-DD') 13 | 14 | var today_dir = dir + formated_date 15 | var latest_dir = dir + 'latest' 16 | if (!fs.existsSync(today_dir)) { 17 | fs.mkdirSync(today_dir) 18 | } 19 | if (!fs.existsSync(latest_dir)) { 20 | fs.mkdirSync(latest_dir) 21 | } 22 | var zones_string = JSON.stringify(zones, null, 2) 23 | fs.writeFileSync(today_dir + '/zones.json', zones_string) 24 | fs.writeFileSync(latest_dir + '/zones.json', zones_string) 25 | -------------------------------------------------------------------------------- /projects/analysis.md: -------------------------------------------------------------------------------- 1 | # Analysis projects using this API 2 | 3 | - [Data Analysis : India - District & State](https://docs.google.com/spreadsheets/d/1yS8x7IrlWLdtgM6UUPm2YN8lLRmRYJKm-4Wax-8EBuI/edit?usp=sharing) 4 | - By: [@Ankan_Plotter](https://t.me/Ankan_Plotter) 5 | 6 | --- 7 | 8 | - [Stats and viz in Google Data Studio](https://tinyurl.com/covid19indiadashboard) 9 | - By: [@veeyeskay](https://t.me/veeyeskay) 10 | 11 | --- 12 | 13 | - [R-naught stats for India](https://www.rt-india.live/) 14 | - By: [@rohit](https://t.me/rohitxsh) 15 | 16 | --- 17 | 18 | - [Covid19 India Predictions and Stats](https://ncov19stats.herokuapp.com) 19 | - By: [Naveen](https://www.github.com/naveensaigit) 20 | 21 | --- 22 | 23 | - [Covid 19 stats and Reproduction value (Rt)](https://www.rtindia.org) 24 | - By: [Laksh](http://github.com/lakshmatai), Yash and [Nilesh](https://github.com/nilesh304) 25 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "api", 3 | "version": "1.0.0", 4 | "description": "fetch and push", 5 | "main": "app.js", 6 | "directories": { 7 | "lib": "lib", 8 | "test": "test" 9 | }, 10 | "scripts": { 11 | "test": "echo \"Error: no test specified\" && exit 1" 12 | }, 13 | "repository": { 14 | "type": "git", 15 | "url": "git+https://github.com/covid19india/api.git" 16 | }, 17 | "keywords": [ 18 | "covid19india.org" 19 | ], 20 | "author": "Jun", 21 | "license": "GPL-3.0", 22 | "bugs": { 23 | "url": "https://github.com/covid19india/api/issues" 24 | }, 25 | "homepage": "https://github.com/covid19india/api#readme", 26 | "dependencies": { 27 | "date-fns": "^2.13.0", 28 | "drive-db": "^6.0.0", 29 | "immer": "^8.0.1", 30 | "moment": "^2.24.0", 31 | "moment-timezone": "^0.5.28", 32 | "node-fetch": "^2.6.1" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/obsolete/generate_districts_daily.js_bak: -------------------------------------------------------------------------------- 1 | console.log('generate_district_daily start') 2 | const fs = require('fs') 3 | 4 | const testFolder = './tmp/districts_daily/' 5 | 6 | files = fs.readdirSync(testFolder) 7 | 8 | data = { districtsDaily: {} } 9 | files.forEach(file => { 10 | if (!file.startsWith('202')) { return } 11 | 12 | var jsonData = JSON.parse(fs.readFileSync(testFolder + file + '/state_district_wise.json', 'utf8')) 13 | for (state in jsonData) { 14 | if (!data.districtsDaily[state]) { 15 | data.districtsDaily[state] = {} 16 | } 17 | for (district in jsonData[state].districtData) { 18 | if (!data.districtsDaily[state][district]) { 19 | data.districtsDaily[state][district] = [] 20 | } 21 | dist = jsonData[state].districtData[district] 22 | delete dist.delta 23 | dist.date = file 24 | data.districtsDaily[state][district].push(dist) 25 | } 26 | } 27 | }) 28 | 29 | fs.writeFileSync('./tmp/districts_daily.json', JSON.stringify(data, null, 2)) 30 | console.log('generate_district_daily end') 31 | -------------------------------------------------------------------------------- /LICENSE_CODE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 covid19india 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /documentation/districtdaily.md: -------------------------------------------------------------------------------- 1 | # districts_daily.json 2 | 3 | ## Description 4 | 5 | This API provides a cumulative count of each district for the categories of C/R/D for each date. 6 | 7 | ## Structure 8 | 9 | ```json 10 | { 11 | "districtsDaily": { 12 | "StateName": { 13 | "DistrictName": [ 14 | { 15 | "active": totalActiveCount, 16 | "confirmed": totalConfirmedCount, 17 | "deceased": totalDeceasedCount, 18 | "recovered": totalRecoveredCount, 19 | "date": "yyyy-mm-dd" 20 | }, 21 | . 22 | . 23 | . 24 | ], 25 | }, 26 | . 27 | . 28 | . 29 | } 30 | } 31 | ``` 32 | 33 | - This API provides an array objects for each district under each state, where each object corresponds to a specific date. 34 | - DistrictName: This is the name of the district. 35 | - active/confirmed/deceased/recovered : These are the cumulative values for that district upto that date. 36 | - The logic of active is: Active = Confirmed - Recovered - Deceased - Migrated Others 37 | 38 | ## Usage and Caveats 39 | 40 | - The objects do not provide Migrated Others values. 41 | -------------------------------------------------------------------------------- /documentation/timeseries.min.md: -------------------------------------------------------------------------------- 1 | # v4/timeseries.min.json 2 | 3 | ## Description 4 | 5 | This is a timeseries API that provides state level data. 6 | 7 | ## Structure 8 | 9 | ```json 10 | { 11 | "StateCode": { 12 | "dates": { 13 | "YYYY-MM-DD":{ 14 | "delta": { 15 | "confirmed": , 16 | "recovered": , 17 | "deceased": , 18 | "tested": , 19 | "vaccinated": 20 | }, 21 | "delta7": { 22 | "confirmed": <7DmaConfirmedDelta>, 23 | "recovered": <7DmaRecoveredDelta>, 24 | "deceased": <7DmaDeceasedDelta>, 25 | "tested": <7DmaTestedDelta>, 26 | "vaccinated": <7DmaVaccinatedDelta> 27 | }, 28 | "total": { 29 | "confirmed": , 30 | "recovered": , 31 | "deceased": , 32 | "tested": , 33 | "vaccinated": 34 | } 35 | }, 36 | . 37 | . 38 | . 39 | 40 | } 41 | ``` 42 | - 7dma is the seven day moving average for that state. 43 | -------------------------------------------------------------------------------- /src/states_daily_to_csv.js: -------------------------------------------------------------------------------- 1 | const daily = require('../tmp/states_daily.json') 2 | const fs = require('fs') 3 | 4 | var CONFIRMED_CSV = 'date,TT,' 5 | var RECOVERED_CSV 6 | var DECEASED_CSV 7 | 8 | var headers = ['date', 'tt'] 9 | for (var key in daily.states_daily[0]) { 10 | if (key !== 'date' && key !== 'status' && key !== 'tt') { 11 | headers.push(key) 12 | CONFIRMED_CSV += key.toUpperCase() + ',' 13 | } 14 | } 15 | RECOVERED_CSV = CONFIRMED_CSV 16 | DECEASED_CSV = CONFIRMED_CSV 17 | 18 | console.log(headers) 19 | console.log(CONFIRMED_CSV) 20 | 21 | daily.states_daily.forEach(element => { 22 | switch (element.status) { 23 | case 'Confirmed': 24 | CONFIRMED_CSV += '\n' 25 | headers.forEach(header => { 26 | CONFIRMED_CSV += element[header] + ',' 27 | }) 28 | break 29 | case 'Recovered': 30 | RECOVERED_CSV += '\n' 31 | headers.forEach(header => { 32 | RECOVERED_CSV += element[header] + ',' 33 | }) 34 | break 35 | case 'Deceased': 36 | DECEASED_CSV += '\n' 37 | headers.forEach(header => { 38 | DECEASED_CSV += element[header] + ',' 39 | }) 40 | break 41 | } 42 | }) 43 | 44 | const CSV_PATH = 'tmp/states_daily_csv/' 45 | if (!fs.existsSync(CSV_PATH)) { 46 | fs.mkdirSync(CSV_PATH, { recursive: true }) 47 | } 48 | 49 | fs.writeFileSync(CSV_PATH + 'confirmed.csv', CONFIRMED_CSV) 50 | fs.writeFileSync(CSV_PATH + 'recovered.csv', RECOVERED_CSV) 51 | fs.writeFileSync(CSV_PATH + 'deceased.csv', DECEASED_CSV) 52 | -------------------------------------------------------------------------------- /projects/bots.md: -------------------------------------------------------------------------------- 1 | # Bots using this API 2 | 3 | ## _Discord bots:_ 4 | 5 | - [Indian Bot | भारतीय स्वायत्तसेवा](https://discord.com/oauth2/authorize?scope=bot&client_id=583897295267954697&permissions=1342565446) (by [gouenji-shuuya](https://github.com/gouenji-shuuya)) 6 | 7 | - [COVID-19 India Bot](https://discordapp.com/oauth2/authorize?&client_id=723409740083757166&scope=bot&permissions=8) (by [vj1224](https://github.com/VJ1224)) 8 | 9 | --- 10 | 11 | ## _Reddit bots:_ 12 | 13 | - [COVID-19 Reddit BOT](https://github.com/parshnt/covid-19-bot) (by [@parshnt](https://github.com/parshnt)) 14 | 15 | --- 16 | 17 | ## _Telegram bots:_ 18 | 19 | - [CoVID19 India Patients Analyzer and Alerts](https://github.com/xsreality/covid19) 20 | 21 | - [CovidBot: CoVID19 Live Stats Chatbot](https://github.com/Tele-Bots/CovidBot) (by [@gurrrung](https://github.com/gurrrung)) 22 | 23 | - [covid19indiatracker_bot](https://github.com/cibinjoseph/covid19indiatracker_bot) 24 | 25 | - [INDIA COVID-19 Google Map TRACKER](https://goo.gl/maps/U32Ex1gWQxmc6Aot8) (by [@jeethesh-kotian](https://github.com/jeethesh-kotian)) 26 | 27 | - [Covid19WorldStats](https://github.com/ravindraten/Covid19WorldStats) (by [@RavindraNayak](https://github.com/ravindraten)) 28 | 29 | - [Covid19India Stats & News] () (by [@akhiltrivedi]) 30 | 31 | - [Karnataka COVID 19 Counts](https://t.me/KarCovid19Bot) (by [@AbhishekPednekar84](https://github.com/AbhishekPednekar84/covid19-kar-bot)) 32 | 33 | --- 34 | 35 | ## _Twitter bots:_ 36 | 37 | - [COVID-19 Twitter BOT](https://twitter.com/covidapp_in) (by Prabhakar Thota [@myinnos](https://github.com/myinnos)) 38 | -------------------------------------------------------------------------------- /documentation/statedaily.md: -------------------------------------------------------------------------------- 1 | # states_daily.json 2 | 3 | ## Description 4 | 5 | This API gives the daily statistics for all states across confirmed, recovered and deceased categories. 6 | 7 | ## Structure 8 | 9 | ```json 10 | { 11 | "states_daily": [ 12 | { 13 | "an": "0", 14 | "ap": "1", 15 | "ar": "0", 16 | "as": "0", 17 | "br": "0", 18 | "ch": "0", 19 | "ct": "0", 20 | "date": "14-Mar-20", 21 | "dd": "0", 22 | "dl": "7", 23 | "dn": "0", 24 | "ga": "0", 25 | "gj": "0", 26 | "hp": "0", 27 | "hr": "14", 28 | "jh": "0", 29 | "jk": "2", 30 | "ka": "6", 31 | "kl": "19", 32 | "la": "0", 33 | "ld": "0", 34 | "mh": "14", 35 | "ml": "0", 36 | "mn": "0", 37 | "mp": "0", 38 | "mz": "0", 39 | "nl": "0", 40 | "or": "0", 41 | "pb": "1", 42 | "py": "0", 43 | "rj": "3", 44 | "sk": "0", 45 | "status": "Confirmed", 46 | "tg": "1", 47 | "tn": "1", 48 | "tr": "0", 49 | "tt": "81", 50 | "un": "0", 51 | "up": "12", 52 | "ut": "0", 53 | "wb": "0" 54 | }, 55 | . 56 | . 57 | . 58 | ] 59 | } 60 | ``` 61 | 62 | - The API gives an array of objects where each object contains the following: 63 | - stateCode : Gives the two letter state code for a state. 64 | - status : Represents the category - Hospitalized, Confirmed, Recovered 65 | - date: The date for which the object belongs. 66 | 67 | - The values given in the value part for each state code gives the delta for that status for that state for that date. 68 | 69 | ## Usage and caveats 70 | 71 | - The cateogry "tt" represents the total count for that date/status combination. 72 | - The API does not consider Migrated category. 73 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const drive = require('drive-db') 3 | const { DIR } = require('./constants') 4 | 5 | const fetchData = async ({ sheet, tabs }) => { 6 | const data = await Promise.all( 7 | Object.keys(tabs).map(async tab => { 8 | return { 9 | [tab]: await drive({ sheet, tab: tabs[tab] }) 10 | } 11 | }) 12 | ) 13 | 14 | let mergedData = {} 15 | 16 | data.forEach(obj => { 17 | mergedData = { ...mergedData, ...obj } 18 | }) 19 | 20 | return mergedData 21 | } 22 | 23 | const writeData = async ({ file, data }) => { 24 | const fileContent = JSON.stringify(sortObjByKey(data), null, '\t') 25 | if (!fs.existsSync(DIR)) { 26 | fs.mkdirSync(DIR, { recursive: true }) 27 | } 28 | return fs.writeFileSync(DIR + file, fileContent) 29 | } 30 | 31 | const sortObjByKey = (value) => { 32 | return (typeof value === 'object') 33 | ? (Array.isArray(value) 34 | ? value.map(sortObjByKey) 35 | : Object.keys(value).sort().reduce( 36 | (o, key) => { 37 | const v = value[key] 38 | o[key] = sortObjByKey(v) 39 | return o 40 | }, {}) 41 | ) 42 | : value 43 | } 44 | 45 | const task = async ({ sheet, tabs, file }) => { 46 | console.log(`Fetching data from sheet: ${sheet}... tab: ${JSON.stringify(tabs)} file: ${file}`) 47 | try { 48 | const data = await fetchData({ sheet, tabs }) 49 | console.log(`Writing data to json file: ${file}...`) 50 | await writeData({ file, data }) 51 | console.log('Operation completed! Created: ' + file) 52 | } catch (e) { 53 | console.log(e) 54 | process.stderr.write(e) 55 | process.exit(1) 56 | } 57 | } 58 | 59 | module.exports = { 60 | fetchData, 61 | writeData, 62 | task 63 | } 64 | -------------------------------------------------------------------------------- /documentation/statedistrictwise.md: -------------------------------------------------------------------------------- 1 | # state_district_wise.json 2 | 3 | ## Description 4 | 5 | This API gives the district level details of states that includes the totals for all categories and the deltas for **the current day**. 6 | 7 | ## Structure 8 | 9 | ```json 10 | { 11 | "State Name": { 12 | "districtData": { 13 | "DistrictName": { 14 | "notes": "Any notes to indicate special cases for districts", 15 | "active": totalActiveCases, 16 | "confirmed": totalConfirmedCases, 17 | "deceased": totalDeceasedCases, 18 | "recovered": totalRecoveredCases, 19 | "delta": { 20 | "confirmed": newCasesForToday, 21 | "deceased": newDeceasedForToday, 22 | "recovered": newRecoveredForToday 23 | } 24 | } 25 | }, 26 | "statecode": "twoLetterStateCode" 27 | }, 28 | . 29 | . 30 | . 31 | } 32 | ``` 33 | 34 | The values for delta and totals are per district per state combination. 35 | 36 | ## Usage and caveats 37 | 38 | - Active numbers are calculated as Active = Confirmed - Recovered - Deceased - Migrated Others. This applies to both delta and total counts. 39 | - The values for all the districts of a state need to be consumed along with the "Unknown" district if there exists one. These are the cases for which the district details are not available as of now. 40 | - There are possibilities of the numbers for districts going negative. This can happen in multiple scenarios 41 | - Deltas could go negative if the state buleltins show lesser cumulative count for categories from one day to the next. 42 | - There is a remote possibility of total active count going negative if the state announced recoveries/deceased for a district but does not provided updated cumulative confirmed count. 43 | - The district names are the ones given in state bulletins. So sometimes the district names might have values like "Other State", "BSF Camp" et al. These are due to the states categorising cases into those broad categories. 44 | - The values of delta reset to 0 for all districts once a new case gets reported anytime after 7am IST the next day. 45 | -------------------------------------------------------------------------------- /src/district_data_generator.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const rawDistData = require('../tmp/district_wise.json') 3 | 4 | const StateDistrictWiseData = rawDistData.districts.reduce((acc, row) => { 5 | if (row.district === 'Unknown' && +row.confirmed === 0 && +row.recovered === 0 && +row.deceased === 0) { 6 | return acc 7 | } 8 | const stateName = row.state 9 | if (!acc[stateName]) { 10 | acc[stateName] = { 11 | districtData: {}, 12 | statecode: row.statecode 13 | } 14 | } 15 | const districtName = row.district 16 | if (!acc[stateName].districtData[districtName]) { 17 | acc[stateName].districtData[districtName] = { 18 | notes: '', 19 | active: 0, 20 | confirmed: 0, 21 | migratedother: 0, 22 | deceased: 0, 23 | recovered: 0, 24 | delta: { 25 | confirmed: 0, 26 | deceased: 0, 27 | recovered: 0 28 | } 29 | } 30 | } 31 | const currentDistrict = acc[stateName].districtData[districtName] 32 | currentDistrict.notes = row.districtnotes 33 | currentDistrict.active = +row.active 34 | currentDistrict.confirmed = +row.confirmed 35 | currentDistrict.migratedother = +row.migratedother 36 | currentDistrict.recovered = +row.recovered 37 | currentDistrict.deceased = +row.deceased 38 | currentDistrict.delta.confirmed = +row.deltaconfirmed 39 | currentDistrict.delta.deceased = +row.deltadeceased 40 | currentDistrict.delta.recovered = +row.deltarecovered 41 | return acc 42 | }, {}) 43 | 44 | const stateDistrictWiseDataV2 = Object.keys(StateDistrictWiseData).map(state => { 45 | const districtData = StateDistrictWiseData[state].districtData 46 | return { 47 | state, 48 | statecode: StateDistrictWiseData[state].statecode, 49 | districtData: Object.keys(districtData).map(district => { 50 | return { district, ...districtData[district] } 51 | }) 52 | } 53 | }) 54 | var mainData = JSON.stringify(StateDistrictWiseData, null, 2) 55 | fs.writeFileSync('./tmp/state_district_wise.json', mainData) 56 | fs.writeFileSync('./tmp/v2/state_district_wise.json', JSON.stringify(stateDistrictWiseDataV2, null, 2)) 57 | -------------------------------------------------------------------------------- /src/obsolete/split_raw_data.js_bak: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const raw_data = require('../tmp/raw_data.json') 3 | 4 | console.log('fetched rawdata') 5 | console.log(raw_data.raw_data.length) 6 | 7 | // var pid = []; 8 | 9 | var states = {} 10 | var statecodes = [] 11 | 12 | var dateitems = {} 13 | var dates = [] 14 | 15 | // var dir_pid = './tmp/cases/pid/'; 16 | var dir_state = './tmp/cases/state/' 17 | var dir_date = './tmp/cases/date/' 18 | 19 | // statecode 20 | // if (!fs.existsSync(dir_pid)) { 21 | // fs.mkdirSync(dir_pid, { recursive: true }); 22 | // } 23 | 24 | if (!fs.existsSync(dir_state)) { 25 | fs.mkdirSync(dir_state, { recursive: true }) 26 | } 27 | if (!fs.existsSync(dir_date)) { 28 | fs.mkdirSync(dir_date, { recursive: true }) 29 | } 30 | 31 | raw_data.raw_data.forEach(element => { 32 | // pid.push(element["patientnumber"]) 33 | // fs.writeFileSync(dir + element["patientnumber"]+'.json', JSON.stringify(element, null, 2)); //this takes more than a minute 34 | if (!states[element.statecode]) { 35 | states[element.statecode] = { 36 | raw_data: [] 37 | } 38 | } 39 | states[element.statecode].raw_data.push(element) 40 | 41 | date = element.dateannounced.replace(/\//g, '-') 42 | if (!dateitems[date]) { 43 | dateitems[date] = { 44 | raw_data: [] 45 | } 46 | } 47 | dateitems[date].raw_data.push(element) 48 | }) 49 | 50 | for (key in dateitems) { 51 | if (!key) continue 52 | dates.push(key) 53 | if (!fs.existsSync(dir_date + key)) { 54 | fs.mkdirSync(dir_date + key, { recursive: true }) 55 | } 56 | fs.writeFileSync(dir_date + key + '/index.json', JSON.stringify(dateitems[key], null, 2)) 57 | } 58 | fs.writeFileSync(dir_date + 'index.json', JSON.stringify(dates, null, 2)) 59 | 60 | for (key in states) { 61 | if (!key) continue 62 | statecodes.push(key) 63 | if (!fs.existsSync(dir_state + key)) { 64 | fs.mkdirSync(dir_state + key, { recursive: true }) 65 | } 66 | fs.writeFileSync(dir_state + key + '/index.json', JSON.stringify(states[key], null, 2)) 67 | } 68 | fs.writeFileSync(dir_state + 'index.json', JSON.stringify(statecodes, null, 2)) 69 | -------------------------------------------------------------------------------- /main.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | repo_uri="https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" 6 | remote_name="origin" 7 | main_branch="master" 8 | gh_pages_branch="gh-pages" 9 | 10 | 11 | git config user.name "$GITHUB_ACTOR" 12 | git config user.email "${GITHUB_ACTOR}@bots.github.com" 13 | 14 | 15 | git checkout "$gh_pages_branch" 16 | 17 | mkdir tmp 18 | mkdir tmp/resources 19 | mkdir tmp/v2 20 | mkdir tmp/updatelog 21 | 22 | cp ./data.json ./tmp/data_prev.json 23 | # cp ./*.json ./tmp/ 24 | cp ./raw_data*.json ./tmp 25 | cp ./misc.json ./tmp 26 | cp ./deaths_recoveries*.json ./tmp 27 | cp ./locales*.json ./tmp 28 | cp ./states_daily.json ./tmp 29 | cp ./district_wise.json ./tmp 30 | cp -r ./updatelog ./tmp 31 | cp -r ./csv ./tmp 32 | 33 | # temporary fixes. remove once Google Sheets is normal 34 | cp ./data.json ./tmp 35 | 36 | git checkout "$main_branch" 37 | 38 | 39 | node src/sheet-to-json_generic.js 40 | 41 | cp README.md tmp/ 42 | cp -r documentation/ tmp/ 43 | cp -r projects/ tmp/ 44 | 45 | node src/sheets-to-csv.js 46 | 47 | node src/states_daily_to_csv.js 48 | node src/district_data_generator.js 49 | # node src/concat_data.js 50 | # node src/split_raw_data.js 51 | # node src/snapshot_zones.js 52 | # node src/generate_districts_daily.js 53 | node src/generate_locale.js 54 | # node src/ultimate_parser.js 55 | # pip3 install --quiet -r requirements.txt 56 | # python3 src/geocoder.py 57 | # python3 src/parser_v3.py 58 | python3 src/parser_v4.py 59 | # python3 src/build_raw_data.py 60 | 61 | node src/sanity_check.js 62 | node src/generate_activity_log.js 63 | 64 | git checkout "$gh_pages_branch" 65 | 66 | rm tmp/data_prev.json 67 | 68 | cp -r tmp/* . 69 | rm -r tmp/ 70 | 71 | 72 | 73 | git add . 74 | set +e # Grep succeeds with nonzero exit codes to show results. 75 | 76 | if git status | grep 'new file\|modified' 77 | then 78 | set -e 79 | git commit -am "data updated on - $(date)" 80 | git remote set-url "$remote_name" "$repo_uri" # includes access token 81 | git push --force-with-lease "$remote_name" "$gh_pages_branch" 82 | else 83 | set -e 84 | echo "No changes since last run" 85 | fi 86 | 87 | echo "finish" 88 | -------------------------------------------------------------------------------- /lib/notify_tg.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | const fetch = require('node-fetch') 3 | // CHAT_ID = "-1001363035222"; // Core 4 | const CHAT_ID = '-1001450930419' // Mods 5 | const BOT_TOKEN = process.env.STUCK_BOT 6 | const GITHUB_TOKEN = process.env.GITHUB_TOKEN 7 | 8 | const GITHUB_WORKFLOW_URL = 'https://github.com/' + process.env.GITHUB_REPOSITORY + '/actions/runs/' + process.env.GITHUB_RUN_ID 9 | var e = process.argv[2] 10 | if (e === 'success') { 11 | if (fs.existsSync('/tmp/apidata_iumessage')) { 12 | var apiDataIUmessage = fs.readFileSync('/tmp/apidata_iumessage', 'utf8') 13 | var apidataiutable = fs.readFileSync('/tmp/apidata_iutable', 'utf8') 14 | var url = encodeURI('https://api.telegram.org/bot' + BOT_TOKEN + '/editMessageText?message_id=929&chat_id=@covid19indiaorg_updates&parse_mode=Markdown&text=' + 15 | apidataiutable) 16 | const settings = { method: 'Get' } 17 | fetch(url, settings).then(res => res.json()) 18 | .then(json => console.log(json)) 19 | url = encodeURI('https://api.telegram.org/bot' + BOT_TOKEN + '/sendMessage?chat_id=-1001478052719&parse_mode=Markdown&text=' + 20 | apidataiutable) 21 | fetch(url, settings).then(res => res.json()) 22 | .then(json => console.log(json)) 23 | url = encodeURI('https://api.telegram.org/bot' + BOT_TOKEN + '/sendmessage?' + 24 | 'disable_web_page_preview=true&disable_notification=true&parse_mode=Markdown&chat_id=-1001449683810&text=' + apiDataIUmessage) 25 | fetch(url, settings).then(res => res.json()).then(json => console.log(json)) 26 | } 27 | } else { 28 | var err = fs.readFileSync('/tmp/apidata_err', 'utf8') 29 | console.log('Sending the following to core') 30 | console.log(err) 31 | err = err.replace(new RegExp(BOT_TOKEN, 'g'), '****') 32 | err = err.replace(new RegExp(GITHUB_TOKEN, 'g'), '****') 33 | console.log(err) 34 | var tempUrl = 'https://api.telegram.org/bot' + BOT_TOKEN + 35 | '/sendmessage?disable_web_page_preview=true&chat_id=' + CHAT_ID + '&text=GitHub Action Status: ' + e + '\n' + GITHUB_WORKFLOW_URL + '\n\n' + err 36 | url = encodeURI(tempUrl) 37 | const settings = { method: 'Get' } 38 | fetch(url, settings).then(res => res.json()) 39 | .then(json => { console.log(json) }) 40 | } 41 | -------------------------------------------------------------------------------- /src/obsolete/sheet-to-json.js_bak: -------------------------------------------------------------------------------- 1 | const moment = require('moment') 2 | const rawData = require('./raw_data') 3 | const { fetchData, writeData } = require('./lib') 4 | const { SHEET, SHEET_STATEWISE_TAB, SHEET_CASES_TIME_SERIES_TAB, SHEET_KEY_VALUES_TAB, SHEET_Tested_Numbers_ICMR_Data, FILE_DATA } = require('./lib/constants') 5 | 6 | const tabs = { 7 | statewise: SHEET_STATEWISE_TAB, 8 | cases_time_series: SHEET_CASES_TIME_SERIES_TAB, 9 | key_values: SHEET_KEY_VALUES_TAB, 10 | tested: SHEET_Tested_Numbers_ICMR_Data 11 | } 12 | 13 | function getDelta (state) { 14 | return rawData.raw_data.reduce((stat, row) => { 15 | const stateName = row.detectedstate 16 | const isToday = moment().utcOffset(330).isSame(moment(row.dateannounced, 'DD-MM-YYYY'), 'day') 17 | if (stateName && (stateName === state || state === 'Total') && isToday) { 18 | const currentStatus = row.currentstatus 19 | if (currentStatus) { 20 | stat.confirmed += 1 21 | switch (currentStatus) { 22 | case 'Hospitalized': 23 | stat.active += 1 24 | break 25 | case 'Recovered': 26 | stat.recovered += 1 27 | break 28 | case 'Deceased': 29 | stat.deaths += 1 30 | break 31 | } 32 | } else { 33 | console.error('Current status is empty in sheet for patient:', row.patientnumber) 34 | } 35 | } 36 | return stat 37 | }, { active: 0, confirmed: 0, deaths: 0, recovered: 0 }) 38 | } 39 | 40 | async function task () { 41 | console.log(`Fetching data from sheets: ${SHEET}...`) 42 | const data = await fetchData({ sheet: SHEET, tabs }) 43 | data.statewise = data.statewise.map(data => Object.assign(data, { delta: getDelta(data.state) })) 44 | console.log(`Writing data to json file: ${FILE_DATA}...`) 45 | await writeData({ file: FILE_DATA, data }) 46 | console.log('Operation completed!') 47 | } 48 | 49 | (async function main () { 50 | console.log('Running task on start...') 51 | await task() 52 | console.log('Created Json File With Updated Contents') 53 | })() 54 | 55 | // source https://github.com/reustle/covid19japan/blob/master/scripts/cache-spreadsheet-data/cache-sheet.js , and made the changes accordingly 56 | -------------------------------------------------------------------------------- /src/obsolete/raw_data-to-state_district_wise_data.js_bak: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const moment = require('moment') 3 | const rawData = require('../tmp/raw_data.json') 4 | 5 | console.log('Starting district wise data processing') 6 | 7 | const StateDistrictWiseData = rawData.raw_data.reduce((acc, row) => { 8 | const todaysdate = moment().utcOffset(330) // set moment instance to IST timezone and fetch currect IST time 9 | const isToday = moment(todaysdate).add(-420, 'm').isSame(moment(row.dateannounced + ' 00:00 +0530', 'DD-MM-YYYY HH:mm Z'), 'day') // Subtract 7 hours from IST time to persist values upto 7 AM and create moment instance from row.dateannounced in the IST Timezone 10 | let stateName = row.detectedstate 11 | if (!stateName) { 12 | return acc 13 | stateName = 'Unknown' 14 | } 15 | if (!acc[stateName]) { 16 | acc[stateName] = { districtData: {} } 17 | } 18 | let districtName = row.detecteddistrict 19 | if (!districtName) { 20 | districtName = 'Unknown' 21 | } 22 | if (!acc[stateName].districtData[districtName]) { 23 | acc[stateName].districtData[districtName] = { 24 | // active: 0, 25 | confirmed: 0, 26 | // deaths: 0, 27 | lastupdatedtime: '', 28 | // recovered: 0, 29 | delta: { 30 | confirmed: 0 31 | } 32 | } 33 | } 34 | const currentDistrict = acc[stateName].districtData[districtName] 35 | 36 | currentDistrict.confirmed++ 37 | if (isToday) { 38 | currentDistrict.delta.confirmed++ 39 | } 40 | // if(row.currentstatus === 'Hospitalized') { 41 | // currentDistrict.active++; 42 | // } else if(row.currentstatus === 'Deceased') { 43 | // currentDistrict.deaths++; 44 | // } else if(row.currentstatus === 'Recovered') { 45 | // currentDistrict.recovered++; 46 | // } 47 | 48 | return acc 49 | }, {}) 50 | 51 | const stateDistrictWiseDataV2 = Object.keys(StateDistrictWiseData).map(state => { 52 | const districtData = StateDistrictWiseData[state].districtData 53 | return { 54 | state, 55 | districtData: Object.keys(districtData).map(district => { 56 | return { district, ...districtData[district] } 57 | }) 58 | } 59 | }) 60 | 61 | fs.writeFileSync('./tmp/state_district_wise.json', JSON.stringify(StateDistrictWiseData, null, 2)) 62 | fs.writeFileSync('./tmp/v2/state_district_wise.json', JSON.stringify(stateDistrictWiseDataV2, null, 2)) 63 | console.log('Starting district wise data processing ...done') 64 | -------------------------------------------------------------------------------- /documentation/statetestdata.md: -------------------------------------------------------------------------------- 1 | # state_test_data.json 2 | 3 | ## Description 4 | 5 | This API gives the daily auxillary data for states wherever it is available. 6 | 7 | ## Structure 8 | 9 | ```json 10 | "states_tested_data": [ 11 | { 12 | "coronaenquirycalls": "", 13 | "cumulativepeopleinquarantine": "", 14 | "negative": "1210", 15 | "numcallsstatehelpline": "", 16 | "numicubeds": "", 17 | "numisolationbeds": "50", 18 | "numventilators": "", 19 | "peopleinicu": "", 20 | "peopleonventilators": "", 21 | "populationncp2019projection": "397000", 22 | "positive": "12", 23 | "source1": "https://t.me/indiacovid/2550", 24 | "source2": "", 25 | "state": "Andaman and Nicobar Islands", 26 | "tagpeopleinquarantine": "", 27 | "tagtotaltested": "Samples Sent", 28 | ~~"testpositivityrate": "0.86%"~~, 29 | ~~"testspermillion": "3534"~~, 30 | ~~"testsperpositivecase": "117"~~, 31 | ~~"testsperthousand": "3.53"~~, 32 | "totaln95masks": "", 33 | "totalpeoplecurrentlyinquarantine": "", 34 | "totalpeoplereleasedfromquarantine": "", 35 | "totalppe": "", 36 | "totaltested": "1403", 37 | "unconfirmed": "181", 38 | "updatedon": "17/04/2020" 39 | }, 40 | . 41 | . 42 | . 43 | ] 44 | ``` 45 | 46 | - updatedon: This provides the data on which the testing report was announced by the relevant authority. 47 | - tagtotaltested: This provides what is the significance of the totaltested values. Not all the numbers are actual tested numbers. As of now, this can have 4 values: samplessent, tested, reportsreceived and samplescollected. 48 | - totaltested: This gives the number as given in the report for the tagtotaltested category. 49 | - unconfirmed: This is the number as given in the report for those tests that aren't confirmed yet (mostly reports aren't received). 50 | - source1/2: This provides the link for the report. 51 | - state: provides the name of the state for which the report belongs. 52 | - Rest of the fields are self explanatory. The values for these are populated whenever the report contains values for those fields. Else they are kept blank. 53 | 54 | ## Usage and caveats 55 | 56 | - The values in postive, negative columns need to be consumed with caution. Many a times the testing reports are separate from actual daily bulletins given by states. This means the number of positives and negatives declared in the testing bulletin might not match with the state daily bulletin. This is mostly due to timing issue of the two bulletins. 57 | - The populationncp2019projection value is derived from [this report](https://nhm.gov.in/New_Updates_2018/Report_Population_Projection_2019.pdf). 58 | -------------------------------------------------------------------------------- /projects/mobile_apps.md: -------------------------------------------------------------------------------- 1 | # Mobile apps using this API 2 | 3 | ## _Android apps:_ 4 | 5 | - __[Covid-19 Status](https://github.com/Coders-Of-XDA-OT/covid19-status-android)__ is a free and open source Android application which shows the current status of Covid-19 in India and around the world. It has a beautiful, simple and fast UI developed by __[Vipul Jha](https://github.com/lordarcadius)__. Download & install the latest apk from __[here](https://www.vipuljha.com/covid)__. 6 | 7 | - [Corona Track] () (by Cyberdoc) Worldwide Stats with containment zones data of all major cities across India, Testing centres and hospitals info 8 | 9 | - [Covid-19 India Android App](https://github.com/GRajin/Covid-19IndiaApp) (by Rajin Gangadharan [@GRajin](https://github.com/GRajin)). This app shows the cases of Covid-19 Virus in India. It is a lightweight application(4mb). This app has a DayWise report, StateWise report and DistrictWise report including graphs. To download APK, click [here](https://github.com/GRajin/Covid-19IndiaApp/releases/download/Covid19_India_v1.0/Covid-19.India.apk). Feel free to fork the project and add your own features! 10 | 11 | - [Covid-19 India Android (React-Native) App](https://github.com/RajeevTomar/Covid-19) (by Rajeev Tomar [@RajeevTomar](https://github.com/RajeevTomar/)) --- [Download Android App](http://positivemind.co.in/apk/covid-19_v1.1.apk) 12 | 13 | [react-native-Covid19India](https://github.com/tejachundru/react-native-Covid19India) (by Teja Chundru [@tejaChundru](https://github.com/tejachundru/)) 14 | 15 | - [COVID19-India-Tracker](https://github.com/Gopal-Dahale/COVID19-India-Tracker) (by Gopal Dahale [@Gopal-Dahale](https://github.com/Gopal-Dahale)) shows the number of cases in states and districts of India. It also features an interactive map to display Coronavirus spread across India. 16 | 17 | - [COVID-19 India Tracker (Flutter) built for COVID-19 stats & patient tracing in India with common code base for Android/IOS/Web Application](https://github.com/akashengginer/covid-19_tracker_india) (by Akash Gupta [@akashengginer](https://github.com/akashengginer)). To Download APK click [here](https://github.com/akashengginer/covid-19_tracker_india/blob/master/apk/app-release.apk) 18 | 19 | - [Covid-19 India Tracker (MaterialDesign | LightWeight)](https://tinyurl.com/covid19IndiaTracker) (by Rahul Pandey [@rahulpandey1501](http://github.com/rahulpandey1501)) 20 | 21 | - [Covid-19 Tracker](http://covidapp.in) (by Prabhakar Thota [@myinnos](https://github.com/myinnos)) 22 | 23 | - [Covid-19 Tracker App](https://github.com/saif191020/covid-19-Tracker-App) (by Saif Basheer Ahamed [@saif191020](https://github.com/saif191020)) 24 | 25 | - __[Covid-19 India Tracker (covidapp.in)](http://covidapp.in)__ This app will help people to find the details status/stats of the COVID-19 in India and other countries stats, also list of quarantine people in respective states. __[Download / Get Android APK](https://drive.google.com/file/d/1n35tQndjuDJR8l1-zYHu6UBO-ZFibFuO/view)__ (by __Prabhakar Thota__ [@myinnos](https://github.com/myinnos)) 26 | 27 | - [NCovid19](http://covid.softycom.in) (by Rohan Mahindrakar [@rohan12](https://github.com/ROHAN12)) 28 | 29 | - [Covid-19 India Android (Flutter) App](https://github.com/iPrabhat404/covid19-flutter) (by Prabhat Pandey [@iPrabhat404](https://github.com/iPrabhat404)) 30 | 31 | - [Covid-19 India Android App](https://tinyurl.com/covid19indiask) (by Sachin K [@sachinkcrp](https://github.com/sachinkcrp)) 32 | 33 | - [COVID19 India](https://github.com/iamnishanth/Covid19India) (by Nishanth [@iamNishanth](https://github.com/iamnishanth)) 34 | 35 | - [Covid-19 Tracker](https://github.com/Mehatab/covid-19) (by Mehatab Shaikh [@mehatab](https://github.com/Mehatab)) 36 | -------------------------------------------------------------------------------- /documentation/v4_data.md: -------------------------------------------------------------------------------- 1 | # v4/data.min.json 2 | 3 | ## Description 4 | 5 | This is a comprehensive API that provides state and details as of the current day. 6 | 7 | ## Structure 8 | 9 | ```json 10 | { 11 | "StateCode": { 12 | "delta": { 13 | "confirmed": ", 14 | "deceased": ", 15 | "recovered": ", 16 | "vaccinated": " 17 | }, 18 | "delta7":{ 19 | "confirmed": <7DmaConfirmedForState>", 20 | "deceased": <7DmaDeceasedForState>", 21 | "recovered": <7DmaRecoveredForState>", 22 | "vaccinated": <7DmaVaccinatedForState>"" 23 | }, 24 | "districts": { 25 | "District1": { 26 | "delta":{ 27 | "confirmed": ", 28 | "deceased": ", 29 | "recovered": ", 30 | "vaccinated": " 31 | }, 32 | "delta7":{ 33 | "confirmed": <7DmaConfirmedForDistrict>", 34 | "deceased": <7DmaDeceasedForDistrict>", 35 | "recovered": <7DmaRecoveredForDistrict>", 36 | "vaccinated": <7DmaVaccinatedForDistrict>"" 37 | }, 38 | "meta": { 39 | "population": , 40 | "tested": { 41 | "last_updated": "yyyy-mm-dd", 42 | "source": "uri", 43 | }, 44 | "notes": "notesForDistrict" 45 | }, 46 | "total": { 47 | "confirmed": , 48 | "deceased": , 49 | "recovered": , 50 | "tested": , 51 | "vaccinated": 52 | } 53 | }, 54 | . 55 | . 56 | . 57 | }, 58 | "meta": { 59 | "last_updated": "yyyy-mm-ddHH24:M:S+GMT", 60 | "population": , 61 | "tested": { 62 | "last_updated": "yyyy-mm-dd", 63 | "source": "uri" 64 | }, 65 | "notes": "notesForState" 66 | }, 67 | "total": { 68 | "confirmed": , 69 | "deceased": , 70 | "recovered": , 71 | "tested": , 72 | "vaccinated": 73 | } 74 | }, 75 | . 76 | . 77 | . 78 | } 79 | 80 | ``` 81 | 82 | - The API is an object with keys corresponding to the two letter StateCode for each state. 83 | - Each State object has the following keys: __districts__, __delta__, __delta7__, __meta__ and __total__. The districts object is a hash object where the keys represent individual districts in the state. The remaing three keys have the save behaviour across states and district objects. They are explained below: 84 | - meta: This substructure provides the following details: 85 | - last_updated: This tells when the current state/district value was updated. 86 | - population: This gives the population of the state (based on NCP projections) and districts (based on 2011 census) 87 | - tested: This has the source and last_updated values for the testing data of the current State/District. 88 | - notes: This gives any special notes added at the State/District level. 89 | - delta: This substructure contains the confirmed, deceased and recovered cases for the current day for the current State/District. 90 | - delta7: This substructure contains the seven day moving average (7DMA) for confirmed, deceased and recovered cases calculated wrt the current day for the current State/District. 91 | - total: This substructure contains the confirmed, deceased and recovered cases till today for the current State/District. 92 | 93 | ## Usage and Caveats 94 | 95 | - This API has data that corresponds to the data seen on the website as of today. This includes, cumulative, delta and testing numbers across states and districts. 96 | - The keys under __delta__, __meta__ and __total__ are present only if there is a corresponding value for the same. Example, if a specific district does not see any change in recovery numbers for today, the __recovered__ key under __delta__ for that district will not be present. 97 | -------------------------------------------------------------------------------- /.github/workflows/javascript.yml: -------------------------------------------------------------------------------- 1 | name: Sheet to Json Workflow 2 | 3 | on: 4 | # schedule: 5 | # # every 20 minutes the job will run 6 | # - cron: "*/15 * * * *" 7 | # push: 8 | # branches: 9 | # - master 10 | repository_dispatch: 11 | types: [my-event] 12 | 13 | jobs: 14 | update-json-data: 15 | runs-on: ubuntu-latest 16 | timeout-minutes: 19 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - run: | 21 | git fetch --no-tags --prune --depth=1 origin +refs/heads/*:refs/remotes/origin/* 22 | - name: Cache node modules 23 | uses: actions/cache@v1 24 | env: 25 | cache-name: cache-node-modules 26 | with: 27 | path: ~/.npm # npm cache files are stored in `~/.npm` on Linux/macOS 28 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }} 29 | restore-keys: | 30 | ${{ runner.os }}-build-${{ env.cache-name }}- 31 | ${{ runner.os }}-build- 32 | ${{ runner.os }}- 33 | 34 | - name: Install Dependencies 35 | run: npm install 36 | 37 | - name: push 38 | run: | 39 | bash ./main.sh 2>> /tmp/apidata_err 40 | echo "the job completed" 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.github_token }} 43 | SHEET_ID_v1: ${{ secrets.SHEET_ID_v1 }} 44 | SHEET_ID_v2: ${{ secrets.SHEET_ID_v2 }} 45 | SHEET_ID_v3: ${{ secrets.SHEET_ID_v3 }} 46 | SHEET_ID_v4: ${{ secrets.SHEET_ID_v4 }} 47 | SHEET_ID_v5: ${{ secrets.SHEET_ID_v5 }} 48 | SHEET_ID_v6: ${{ secrets.SHEET_ID_v6 }} 49 | SHEET_ID_v7: ${{ secrets.SHEET_ID_v7 }} 50 | SHEET_ID_v8: ${{ secrets.SHEET_ID_v8 }} 51 | SHEET_ID_v9: ${{ secrets.SHEET_ID_v9 }} 52 | SHEET_ID_v10: ${{ secrets.SHEET_ID_v10 }} 53 | SHEET_ID_v11: ${{ secrets.SHEET_ID_v11 }} 54 | SHEET_ID_v12: ${{ secrets.SHEET_ID_v12 }} 55 | SHEET_ID_v13: ${{ secrets.SHEET_ID_v13 }} 56 | SHEET_ID_v14: ${{ secrets.SHEET_ID_v14 }} 57 | SHEET_ID_v15: ${{ secrets.SHEET_ID_v15 }} 58 | SHEET_ID_v16: ${{ secrets.SHEET_ID_v16 }} 59 | SHEET_ID_v17: ${{ secrets.SHEET_ID_v17 }} 60 | SHEET_ID_v18: ${{ secrets.SHEET_ID_v18 }} 61 | SHEET_ID_v19: ${{ secrets.SHEET_ID_v19 }} 62 | SHEET_ID_v20: ${{ secrets.SHEET_ID_v20 }} 63 | SHEET_ID_v21: ${{ secrets.SHEET_ID_v21 }} 64 | SHEET_ID_v22: ${{ secrets.SHEET_ID_v22 }} 65 | SHEET_ID_v23: ${{ secrets.SHEET_ID_v23 }} 66 | SHEET_ID_v24: ${{ secrets.SHEET_ID_v24 }} 67 | SHEET_ID_v25: ${{ secrets.SHEET_ID_v25 }} 68 | SHEET_ID_v26: ${{ secrets.SHEET_ID_v26 }} 69 | SHEET_ID_v27: ${{ secrets.SHEET_ID_v27 }} 70 | SHEET_ID_v28: ${{ secrets.SHEET_ID_v28 }} 71 | SHEET_ID_v29: ${{ secrets.SHEET_ID_v29 }} 72 | SHEET_ID_v30: ${{ secrets.SHEET_ID_v30 }} 73 | SHEET_ID_v31: ${{ secrets.SHEET_ID_v31 }} 74 | SHEET_ID_v32: ${{ secrets.SHEET_ID_v32 }} 75 | RESOURCES_SHEET_ID: ${{ secrets.RESOURCES_SHEET_ID }} 76 | SHEET_LOCALE: ${{ secrets.SHEET_LOCALE }} 77 | STUCK_BOT: ${{ secrets.STUCK_BOT }} 78 | - name: show job status 79 | if: always() 80 | run: | 81 | git checkout master 82 | node lib/notify_tg.js $status 83 | env: 84 | status: ${{job.status}} 85 | STUCK_BOT: ${{ secrets.STUCK_BOT }} 86 | GITHUB_TOKEN: ${{ secrets.github_token }} 87 | - name: Repository Dispatch 88 | if: always() # ${{ !cancelled() }} 89 | uses: actions/github-script@v2 90 | with: 91 | github-token: ${{secrets.JUN_TOKEN2}} 92 | script: | 93 | const fs = require('fs'); 94 | await github.repos.createDispatchEvent({ 95 | owner: context.repo.owner, 96 | repo: context.repo.repo, 97 | event_type: "my-event", 98 | client_payload: { greeting: "I'm Mr Meeseek. Loot at me!" } 99 | }); 100 | -------------------------------------------------------------------------------- /projects/miscellaneous.md: -------------------------------------------------------------------------------- 1 | # Other Trackers/Dashboards using this API 2 | 3 | - [Tracker](https://covidstat.info/) 4 | - By: [@skavinvarnan](https://github.com/skavinvarnan) 5 | 6 | --- 7 | 8 | - [Statistics and Predictive Analysis (India)](https://gnsp.in/covid19/) 9 | - By: [@GnsP](https://github.com/GnsP) 10 | 11 | --- 12 | 13 | - [TN Specific](https://covid19trackerbk.netlify.app/) 14 | - [Repo](https://github.com/dynamicbalaji/covid19-tracker) 15 | 16 | --- 17 | 18 | - [Tracker](https://livecovid.in/) 19 | - By: [@anamritraj](https://github.com/anamritraj/livecovid.in-webapp) 20 | 21 | --- 22 | 23 | - [India & World Tracker](http://tcovid19.herokuapp.com/) 24 | - By: [@thecoducer](https://github.com/thecoducer) 25 | 26 | --- 27 | 28 | - [COVID-19 Track](http://github.com/adarshbalu/covid_track/) 29 | - By: [@adarshbalu](https://github.com/adarshbalu) 30 | 31 | --- 32 | 33 | - [Maharashtra COVID-19 Dashboard/Tracker](http://covid.pranavsheth.com/) 34 | - By: [@pranavs80](https://github.com/pranavs80) 35 | 36 | --- 37 | 38 | - [COVID-19 India Tracker](https://covidindiatracker.netlify.app/) 39 | - By: [@PrinceSumberia](https://github.com/PrinceSumberia) 40 | 41 | --- 42 | 43 | - [COVID-19 Tracker/Visualizer](https://coronago.cf/) 44 | - By: [@kaushikbhat07](https://github.com/kaushikbhat07) 45 | 46 | --- 47 | 48 | - [Bihar COVID-19 Dashboard/Tracker](https://coronainbihar.github.io/) 49 | - By: [@anandv01](https://github.com/anandv01) 50 | 51 | --- 52 | 53 | - [COVID19 Tracker/Predictor](https://track-covid-19ind.herokuapp.com/) 54 | - By: [@manoj](https://github.com/ManojNallusamy) 55 | 56 | --- 57 | 58 | - [Covid19-Tracker-App](https://harshitchauhan.github.io/Covid19-Tracker-App/) 59 | - By: [@HarshitChauhan](https://github.com/HarshitChauhan) 60 | 61 | --- 62 | 63 | - [COVID-19 Tracker App](https://corona-india.live/) 64 | - By: [@sandeshchoudhary](https://github.com/sandeshchoudhary) 65 | 66 | --- 67 | 68 | - [COVID-19 India Tracker](https://covid19indiaa.000webhostapp.com) 69 | - By: [@Samirrana](https://github.com/samirrana1011) 70 | 71 | --- 72 | 73 | - [Corona Virus Tracker](https://virtuosars.github.io/CovidLive) 74 | - By: [@VirtuosArs](https://github.com/VirtuosArs) 75 | 76 | --- 77 | 78 | - [COVID-19 Dashboard India/World](https://nkjcovid19.herokuapp.com/india) 79 | - By: [@nithinkjoy-tech](https://github.com/nithinkjoy-tech) 80 | 81 | --- 82 | 83 | - [Covid19Bihar Tracker](https://covid19bihar.github.io/) 84 | - By: [@arrbxr](https://github.com/arrbxr) 85 | 86 | --- 87 | 88 | - [COVID19-Tracker App, UP Highlight](https://www.covid19-tracker.in/) 89 | - By: [@Deepak0404](https://github.com/Deepak0404) 90 | 91 | --- 92 | 93 | - [Covid-19 India Flutter App](https://github.com/curioustechizen/covid19india-flutter) 94 | - By: [@curioustechizen](https://github.com/curioustechizen) 95 | 96 | --- 97 | 98 | - [COVID-19 INDIA LOCATION BASED TRACKER](https://covid2.in) (District Screen) 99 | 100 | --- 101 | 102 | - [COVID-19 Tracker (Flutter App)](https://github.com/prateekKrOraon/covid19_tracker) 103 | - By: [@prateekKrOraon](https://github.com/prateekKrOraon) 104 | 105 | --- 106 | 107 | - [NCovid19](http://covid.softycom.in) 108 | - By: Rohan Mahindrakar [@rohan12](https://github.com/ROHAN12) 109 | 110 | --- 111 | 112 | - [COVID_19](https://web2.eu5.org) 113 | - By: [@rishigole42](https://github.com/rishigole42/) 114 | 115 | --- 116 | 117 | - [COVID DashBoard for Policy Makers](https://covid19kol.herokuapp.com/) (Built in consultation with isical Kolkata for intervention monitoring) 118 | 119 | --- 120 | 121 | - [COVID-19-Tracker](https://aakashmangla2000.github.io/Covid-19-India-Website/) 122 | - By: [@aakashmangla2000](https://github.com/Aakashmangla2000) 123 | 124 | --- 125 | 126 | - [COVID-19 Tracker for India (React App)](https://covid19indiastatus-da4dc.web.app) (State and District wise figures) 127 | - By: [@mehulsengupta](https://github.com/mehulsengupta/) 128 | 129 | --- 130 | 131 | - [R0 Tracker India](https://www.nidhigupta.live/rtcovid) (States and Districts wise R0 tracking in India) 132 | 133 | --- 134 | 135 | - [COVID 19 Tracker - Karnataka](https://kar.covid19-info.website/) 136 | - By: [@AbhishekPednekar](https://github.com/AbhishekPednekar84) 137 | 138 | --- 139 | 140 | - [COVID-19 Tracker | India](https://indiafightscorona.netlify.app/) 141 | - By: [@vinitshahdeo](https://github.com/vinitshahdeo) 142 | -------------------------------------------------------------------------------- /documentation/rawdata.md: -------------------------------------------------------------------------------- 1 | # raw_data{n}.json 2 | 3 | ## Description 4 | 5 | raw_data{n}.json represents the rows entered into the google sheets by the data ops team. The data present in raw_data json file are either - individual records or district level records. Some of these district level records might have district name as empty or Unknown. These are due to state bulletins not having enough details about district splits. 6 | 7 | ## Structure 8 | 9 | - `agebracket` 10 | This provides the age of the case. This is applicable only for rows that represent individual records. 11 | - `backupnotes` 12 | This was used during initial days to record additional information. The usage of this has been discontinued. 13 | - `contractedfromwhichpatientsuspected` 14 | This field gives the patient id from whom the current patient is believed to have contracted the virus. This is based on state bulletins. As of July 10th, this field is used only for Karnataka records. 15 | - `currentstatus` 16 | This can have: Hospitalized, Recovered, Deceased or Migrated_Others as values. These represent the status of the case. Migrated_Others is used as a place holder for those cases that are marked by states as having migrated to other states or have died due to non covid reasons. 17 | - `dateannounced` 18 | This field gives the date on which the case(s) was reported by the state/central bulletin 19 | - `detectedcity` 20 | This field gives the city where the case(s) were reported. 21 | - `detecteddistrict` 22 | This field gives the district where the case(s) were reported. 23 | - `detectedstate` 24 | This field gives the state where the case(s) were reported. 25 | - `estimatedonsetdate` 26 | This field is not used. 27 | - `gender` 28 | This provides the gender of the case. This is applicable only for rows that represent individual records. 29 | - `nationality` 30 | This provides the nationality of the case. This is applicable only for rows that represent individual records. 31 | - `notes` 32 | This provides the any notes regarding the case(s) that are reported. 33 | - `numcases` 34 | This field can any integer value. This field denotes the following: 35 | - If the value is == 1 : The row indicates an individual record (age and gender info are subject to state bulletin releases). 36 | - If the value != 1 (greater than or less than 1) : The row indicates a district level record. 37 | - If the value > 0 and (detecteddistrict == '' or detecteddistrict == 'Unknown') : The row added without district details as state bulletin did not have district details at the point of data entry. 38 | - If the value < 0 and (detecteddistrict == '' or detecteddistrict == 'Unknown') : The row added to adjust a previously added bulk entry as stated above 39 | - If the value < 0 and (detecteddistrict != '' and detecteddistrict != 'Unknown') : The row added due to state bulletins providing a reduced count for that district/day/category combination. 40 | - If the value == 0 : Ingore the record. This would've been an entry made and ignored later due to clarifications from state bulletins. 41 | - `patientnumber` 42 | - This field used to hold a global patient number for all patients being reported. This has been discontinued with the shift to district level entries. 43 | - `source1` 44 | - Source for the current row. 45 | - `source2` 46 | - Additional source for the current row. 47 | - `source3` 48 | - Additional source for the current row. 49 | - `statecode` 50 | - A two letter code to represent the state/UT. 51 | - `statepatientnumber` 52 | - A field to represent the state patient number if the state bulletin provided a state patient number. 53 | - `statuschangedate` 54 | - A field to represent the change of status of patients from Hospitalized -> Recovered or Hospitalized -> Deceased. This field has been discontiuned with district level records. 55 | 56 | ## Usage and caveats 57 | 58 | - raw_data json api should be used only when the relevant details are not available through the other APIs provided for state/districts. raw data apis have evolved over time. The data present in some of the earlier versions of raw_data json are not completely clean since there was a lot of inconsistency in the reporting by state governments. Please exercise caution while using raw_data json APIs. 59 | - numcases column it's significance and handling of bulk negatives: 60 | As pointed in the description, the numcases can have a varied set of values. Most important to note while dealing with numcases are those rows with bulk +ve/-ve values without a district value present. These are entries which represent data that has no district level information. However, sometimes the states release these details at a later point in time. In events as such, a bulk -ve is added to the same state/date/category combination to negate out the previous bulk entry and district splits are added to the state/date/category combination. In technical terms - if you were to write an SQL query with count on numcases column and a group by on districtname, statename, dateannounced and currentstatus, you should get proper values for all districts for that state for those days where data is present. The output of a query as such might also yeild rows with empty or unknown districts. These are the cases for which district details were not announced by the state govts. Examples: DL, TS both have a large number of unknown districts for all three categories. 61 | Some rows might have district name and might have a negative value for numcases. These are genuine reductions due to state bulletin numbers. These should be consumed as is. 62 | -------------------------------------------------------------------------------- /src/generate_activity_log.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const moment = require('moment-timezone') 3 | const data = require('../tmp/data.json') 4 | const dataPrev = require('../tmp/data_prev.json') 5 | 6 | const updateLogFile = './tmp/updatelog/log.json' 7 | var updateLog = require('.' + updateLogFile) 8 | 9 | var statewiseNew = data.statewise.reduce((arr, row) => { 10 | arr[row.state] = row 11 | return arr 12 | }, {}) 13 | 14 | var confirmedText 15 | var recoveredText 16 | var deathText 17 | var fullText = '' 18 | var tgFullText = '' 19 | var isChanged = false 20 | 21 | var relStates = {} 22 | 23 | function initRelStates (state) { 24 | if (!relStates[state]) { 25 | relStates[state] = {} 26 | } 27 | } 28 | 29 | dataPrev.statewise.forEach(element => { 30 | if (element.state === 'Total') { 31 | return 32 | } 33 | isChanged = false 34 | confirmedText = null 35 | recoveredText = null 36 | deathText = null 37 | var text = null 38 | if (parseInt(element.confirmed) < parseInt(statewiseNew[element.state].confirmed)) { 39 | var confirmedDiff = statewiseNew[element.state].confirmed - element.confirmed 40 | confirmedText = confirmedDiff + ' new case' + (confirmedDiff === 1 ? '' : 's') 41 | isChanged = true 42 | } 43 | if (parseInt(element.recovered) < parseInt(statewiseNew[element.state].recovered)) { 44 | var recoveredDiff = statewiseNew[element.state].recovered - element.recovered 45 | recoveredText = recoveredDiff + ' recover' + (recoveredDiff === 1 ? 'y' : 'ies') 46 | isChanged = true 47 | } 48 | if (parseInt(element.deaths) < parseInt(statewiseNew[element.state].deaths)) { 49 | var deathDiff = statewiseNew[element.state].deaths - element.deaths 50 | deathText = deathDiff + ' death' + (deathDiff === 1 ? '' : 's') 51 | isChanged = true 52 | } 53 | 54 | if (isChanged) { 55 | text = (confirmedText ? confirmedText + ', ' : '') + (recoveredText ? recoveredText + ', ' : '') + (deathText ? deathText + ', ' : '') 56 | var arr = text.split(', ') 57 | if (arr.length > 2) { 58 | arr = text.split(', ') 59 | arr = arr.slice(0, -1) 60 | var arrLast = arr[arr.length - 1] 61 | arr = arr.slice(0, -1) 62 | text = arr.join(', ') 63 | text = text + ' and ' + arrLast 64 | } else { 65 | arr = arr.slice(0, -1) 66 | text = arr.join() 67 | } 68 | text = text + ' in ' + element.state 69 | fullText = fullText + text + '\n' 70 | } 71 | }) 72 | function fillSpace (str, width) { 73 | var empt = Array(width - str.length).join(' ') 74 | return empt + str 75 | } 76 | 77 | const widthState = 2 78 | const widthConfirmed = 15 79 | const widthRecovered = 15 80 | const widthDeceased = 14 81 | 82 | function editMessage (lastUpdated) { 83 | data.statewise.forEach(element => { 84 | var stateCode = element.statecode 85 | if (stateCode === 'TT' || statewiseNew[element.state].confirmed === 0) { 86 | return 87 | } 88 | initRelStates(stateCode) 89 | relStates[stateCode].C = +statewiseNew[element.state].confirmed 90 | relStates[stateCode].Cd = +statewiseNew[element.state].deltaconfirmed 91 | relStates[stateCode].R = +statewiseNew[element.state].recovered 92 | relStates[stateCode].Rd = +statewiseNew[element.state].deltarecovered 93 | relStates[stateCode].D = +statewiseNew[element.state].deaths 94 | relStates[stateCode].Dd = +statewiseNew[element.state].deltadeaths 95 | }) 96 | var words = fillSpace('St', widthState) + 97 | fillSpace('Cnfrmd', widthConfirmed) + 98 | fillSpace('Rcvrd', widthRecovered) + 99 | fillSpace('Dcsd', widthDeceased) + '\n' 100 | 101 | const lengthOfLine = widthState + widthConfirmed + widthRecovered + widthDeceased 102 | words += Array(lengthOfLine).join('-') + '\n' 103 | for (var element in relStates) { 104 | var c = '(' + relStates[element].Cd + ') ' + relStates[element].C 105 | var r = '(' + relStates[element].Rd + ') ' + relStates[element].R 106 | var d = '(' + relStates[element].Dd + ') ' + relStates[element].D 107 | 108 | words += fillSpace(element, widthState) + 109 | fillSpace(c, widthConfirmed) + 110 | fillSpace(r, widthRecovered) + 111 | fillSpace(d, widthDeceased) + '\n' 112 | // console.log(rel_states[element]); 113 | } 114 | var indiaTotal = '*Covid-19 India*\n' 115 | indiaTotal += 'Last updated: _' + lastUpdated + '_\n\n' 116 | 117 | var total = statewiseNew.Total 118 | indiaTotal += '``` Total cases: (↑' + total.deltaconfirmed + ') ' + total.confirmed + 119 | '\n' + ' Recovered : (↑' + total.deltarecovered + ') ' + total.recovered + 120 | '\n' + ' Deaths : (↑' + total.deltadeaths + ') ' + total.deaths + '```' 121 | 122 | // console.log(india_total); 123 | 124 | words = indiaTotal + '\n\n```\n' + words + '```\n\n*www.covid19india.org*' 125 | 126 | console.log(words) 127 | fs.writeFileSync('/tmp/apidata_iutable', words) 128 | } 129 | 130 | if (fullText !== '') { 131 | var total = statewiseNew.Total 132 | tgFullText = fullText + '\n' + 133 | '``` Total cases: (↑' + total.deltaconfirmed + ') ' + total.confirmed + 134 | '\n' + ' Recovered : (↑' + total.deltarecovered + ') ' + total.recovered + 135 | '\n' + ' Deaths : (↑' + total.deltadeaths + ') ' + total.deaths + '```' 136 | 137 | const now = moment().unix() 138 | var entry = {} 139 | entry.update = fullText 140 | entry.timestamp = now 141 | updateLog.push(entry) 142 | updateLog = updateLog.slice(-50) 143 | 144 | fs.writeFileSync(updateLogFile, JSON.stringify(updateLog, null, 2)) 145 | 146 | var date = moment.unix(now) 147 | var formatedTime = date.tz('Asia/Kolkata').format('MMMM DD, hh:mm A') + ' IST' 148 | editMessage(formatedTime) 149 | 150 | // console.log(formated_time) 151 | var finalText = '_' + 152 | formatedTime + '_\n\n' + 153 | tgFullText + 154 | '\n\n*www.covid19india.org*' 155 | console.log(finalText) 156 | 157 | fs.writeFileSync('/tmp/apidata_iumessage', finalText) 158 | } else { 159 | console.log('No updates this time!') 160 | } 161 | -------------------------------------------------------------------------------- /src/obsolete/ultimate_parser.js_bak: -------------------------------------------------------------------------------- 1 | console.log('Ultimate parser start') 2 | 3 | const { STATE_CODES_ARRAY, STATE_CODES_REVERSE } = require('../lib/constants') 4 | 5 | const fs = require('fs') 6 | 7 | const { parse, format, formatISO } = require('date-fns') 8 | const { produce } = require('immer') 9 | const ultimateParser = ( 10 | statesDailyResponse, 11 | zonesResponse, 12 | data, 13 | stateDistrictWiseResponse, 14 | stateTestData 15 | ) => { 16 | let ICMR = {} 17 | let prevSamples = 0 18 | 19 | data.tested.map((testObj) => { 20 | ICMR = produce(ICMR, (draftICMR) => { 21 | let timestamp = null 22 | try { 23 | timestamp = format( 24 | parse(testObj.updatetimestamp, 'dd/MM/yyyy HH:mm:ss', new Date()), 25 | 'yyyy-MM-dd' 26 | ) 27 | } catch (error) {} 28 | if (timestamp) { 29 | draftICMR[timestamp] = { 30 | samples: +testObj.totalsamplestested - prevSamples, 31 | source: testObj.source 32 | } 33 | prevSamples = +testObj.totalsamplestested 34 | } 35 | }) 36 | }) 37 | 38 | let tested = { TT: ICMR } 39 | prevSamples = 0 40 | 41 | stateTestData.states_tested_data.map((testObj) => { 42 | tested = produce(tested, (draftState) => { 43 | draftState[STATE_CODES_REVERSE[testObj.state]] = produce( 44 | draftState[STATE_CODES_REVERSE[testObj.state]] || {}, 45 | (draftTest) => { 46 | draftTest[ 47 | format( 48 | parse(testObj.updatedon, 'dd/MM/yyyy', new Date()), 49 | 'yyyy-MM-dd' 50 | ) 51 | ] = { 52 | samples: +testObj.totaltested - prevSamples, 53 | source: testObj.source1 54 | } 55 | prevSamples = +testObj.totaltested 56 | } 57 | ) 58 | }) 59 | }) 60 | 61 | let timeseries = {} 62 | 63 | statesDailyResponse.states_daily.map((dailyObj) => { 64 | timeseries = produce(timeseries, (draftTS) => { 65 | STATE_CODES_ARRAY.map((state) => { 66 | draftTS[state.code] = produce( 67 | draftTS[state.code] || {}, 68 | (draftDate) => { 69 | const date = format( 70 | parse(dailyObj.date, 'dd-LLL-yy', new Date()), 71 | 'yyyy-MM-dd' 72 | ) 73 | let testedDict = null 74 | try { 75 | testedDict = tested[state.code][date] || null 76 | } catch (error) {} 77 | draftDate[date] = produce( 78 | draftDate[date] || { tested: testedDict }, 79 | (draftType) => { 80 | draftType[dailyObj.status.toLowerCase()] = 81 | +dailyObj[state.code.toLowerCase()] 82 | } 83 | ) 84 | } 85 | ) 86 | }) 87 | }) 88 | }) 89 | 90 | let statewise = {} 91 | data.statewise.map((state) => { 92 | statewise = produce(statewise, (draftState) => { 93 | let latestTestObj = null 94 | const latestTestDate = null 95 | try { 96 | const latestTestDate = Object.keys(tested[state.statecode])[ 97 | Object.keys(tested[state.statecode]).length - 1 98 | ] 99 | latestTestObj = tested[state.statecode][latestTestDate] 100 | } catch (error) {} 101 | draftState[state.statecode] = { 102 | total: { 103 | confirmed: +state.confirmed, 104 | recovered: +state.recovered, 105 | deceased: +state.deaths, 106 | tested: produce(latestTestObj || null, (draftTestObj) => { 107 | if (draftTestObj) { 108 | draftTestObj.last_updated = Object.keys( 109 | tested[state.statecode] 110 | )[Object.keys(tested[state.statecode]).length - 1] 111 | } 112 | }) 113 | }, 114 | delta: { 115 | confirmed: +state.deltaconfirmed, 116 | recovered: +state.deltarecovered, 117 | deceased: +state.deltadeaths 118 | }, 119 | timeseries: timeseries[state.statecode], 120 | notes: state.statenotes, 121 | last_updated: 122 | formatISO( 123 | parse(state.lastupdatedtime, 'dd/MM/yyyy HH:mm:ss', new Date()) 124 | ).slice(0, 19) + '+05:30' 125 | } 126 | }) 127 | }) 128 | 129 | const zones = zonesResponse.zones 130 | zones.push({ 131 | statecode: 'TT' 132 | }) 133 | let states = {} 134 | 135 | zones.map((zone) => { 136 | states = produce(states, (draftState) => { 137 | draftState[zone.statecode] = produce( 138 | draftState[zone.statecode] || { 139 | timeseries: timeseries[zone.statecode], 140 | districts: {}, 141 | total: statewise[zone.statecode].total, 142 | delta: statewise[zone.statecode].delta, 143 | notes: statewise[zone.statecode].notes, 144 | last_updated: statewise[zone.statecode].last_updated 145 | }, 146 | (draftDistricts) => { 147 | if (zone.statecode === 'TT') { 148 | draftDistricts.districts = null 149 | } else { 150 | draftDistricts.districts[zone.district] = { 151 | delta: { 152 | confirmed: 153 | +stateDistrictWiseResponse[zone.state].districtData[ 154 | zone.district 155 | ].delta.confirmed, 156 | recovered: 157 | +stateDistrictWiseResponse[zone.state].districtData[ 158 | zone.district 159 | ].delta.recovered, 160 | deceased: 161 | +stateDistrictWiseResponse[zone.state].districtData[ 162 | zone.district 163 | ].delta.deceased 164 | }, 165 | total: { 166 | confirmed: 167 | +stateDistrictWiseResponse[zone.state].districtData[ 168 | zone.district 169 | ].confirmed, 170 | recovered: 171 | +stateDistrictWiseResponse[zone.state].districtData[ 172 | zone.district 173 | ].recovered, 174 | deceased: 175 | +stateDistrictWiseResponse[zone.state].districtData[ 176 | zone.district 177 | ].deceased 178 | }, 179 | zone: { 180 | status: zone.zone, 181 | last_updated: format( 182 | parse(zone.lastupdated, 'dd/MM/yyyy', new Date()), 183 | 'yyyy-MM-dd' 184 | ) 185 | }, 186 | notes: 187 | stateDistrictWiseResponse[zone.state].districtData[ 188 | zone.district 189 | ].notes 190 | } 191 | } 192 | } 193 | ) 194 | }) 195 | }) 196 | return states 197 | } 198 | 199 | const data = require('../tmp/data.json') 200 | const stateDistrictWiseResponse = require('../tmp/state_district_wise.json') 201 | const stateTestData = require('../tmp/state_test_data.json') 202 | const statesDailyResponse = require('../tmp/states_daily.json') 203 | const zonesResponse = require('../tmp/zones.json') 204 | 205 | const new_data = ultimateParser( 206 | statesDailyResponse, 207 | zonesResponse, 208 | data, 209 | stateDistrictWiseResponse, 210 | stateTestData 211 | ) 212 | 213 | fs.writeFileSync('./tmp/v2/data.json', JSON.stringify(new_data, null, 2)) 214 | fs.writeFileSync('./tmp/v2/data.min.json', JSON.stringify(new_data, null, 0)) 215 | 216 | console.log('Ultimate parser end') 217 | -------------------------------------------------------------------------------- /lib/constants.js: -------------------------------------------------------------------------------- 1 | const SHEET_V1 = process.env.SHEET_ID_v1 2 | const SHEET_V2 = process.env.SHEET_ID_v2 3 | const SHEET_V3 = process.env.SHEET_ID_v3 4 | const SHEET_V4 = process.env.SHEET_ID_v4 5 | const SHEET_V5 = process.env.SHEET_ID_v5 6 | const SHEET_V6 = process.env.SHEET_ID_v6 7 | const SHEET_V7 = process.env.SHEET_ID_v7 8 | const SHEET_V8 = process.env.SHEET_ID_v8 9 | const SHEET_V9 = process.env.SHEET_ID_v9 10 | const SHEET_V10 = process.env.SHEET_ID_v10 11 | const SHEET_V11 = process.env.SHEET_ID_v11 12 | const SHEET_V12 = process.env.SHEET_ID_v12 13 | const SHEET_V13 = process.env.SHEET_ID_v13 14 | const SHEET_V14 = process.env.SHEET_ID_v14 15 | const SHEET_V15 = process.env.SHEET_ID_v15 16 | const SHEET_V16 = process.env.SHEET_ID_v16 17 | const SHEET_V17 = process.env.SHEET_ID_v17 18 | const SHEET_V18 = process.env.SHEET_ID_v18 19 | const SHEET_V19 = process.env.SHEET_ID_v19 20 | const SHEET_V20 = process.env.SHEET_ID_v20 21 | const SHEET_V21 = process.env.SHEET_ID_v21 22 | const SHEET_V22 = process.env.SHEET_ID_v22 23 | const SHEET_V23 = process.env.SHEET_ID_v23 24 | const SHEET_V24 = process.env.SHEET_ID_v24 25 | const SHEET_V25 = process.env.SHEET_ID_v25 26 | const SHEET_V26 = process.env.SHEET_ID_v26 27 | const SHEET_V27 = process.env.SHEET_ID_v27 28 | const SHEET_V28 = process.env.SHEET_ID_v28 29 | const SHEET_V29 = process.env.SHEET_ID_v29 30 | const SHEET_V30 = process.env.SHEET_ID_v30 31 | const SHEET_V31 = process.env.SHEET_ID_v31 32 | const SHEET_V32 = process.env.SHEET_ID_v32 33 | 34 | const SHEET = process.env.SHEET_ID_v32 35 | 36 | const SHEET_RESOURCES = process.env.RESOURCES_SHEET_ID 37 | const SHEET_LOCALE = process.env.SHEET_LOCALE 38 | 39 | // Sheet IDs can be obtained here: https://spreadsheets.google.com/feeds/worksheets//private/full 40 | const SHEET_RESOURCES_SHEET = 'otcvog0' 41 | const SHEET_RAW_DATA = 'od6' 42 | const SHEET_STATEWISE_TAB = 'ovd0hzm' 43 | const SHEET_CASES_TIME_SERIES_TAB = 'o6emnqt' 44 | const SHEET_KEY_VALUES_TAB = 'owlnkho' 45 | const SHEET_TESTED_NUMBERS_ICMR_DATA = 'ozg9iqq' 46 | const SHEET_STATEWISE_TESTED_NUMBERS_DATA = 'o81fdow' 47 | const SHEET_FAQ = 'oknbjsw' 48 | const SHEET_NAME_FACTOIDS = 'ooka3he' 49 | const SHEET_TRAVEL_HISTORY = 'opc5w4v' 50 | const SHEET_DATE_WISE_DELTA = 'on2tlaw' 51 | const SHEET_DEATHS_AND_RECOVERIES = 'o3biev0' 52 | const SHEET_SOURCES_LIST = 'obndi9r' 53 | const SHEET_DISTRICT_WISE = 'o3rdj1v' 54 | const SHEET_ZONES = 'oo4bpj4' 55 | const SHEET_STATES_META_DATA = 'o3t26de' 56 | const SHEET_DISTRICTS_META_DATA = 'ocicunx' 57 | const SHEET_DISTRICT_TESTING_DATA = 'o7l1lwr' 58 | const SHEET_TWITTER_QUERIES = 'oidib2z' 59 | const SHEET_COWIN_VACCINE_DATA_STATEWISE = 'ota7ffy' 60 | const SHEET_CROWDSOURCED_RESOURCES_LINKS = 'o3uhpw1' 61 | 62 | const DIR = './tmp/' 63 | 64 | const FILE_RAW_DATA_1 = '/raw_data1.json' 65 | const FILE_RAW_DATA_2 = '/raw_data2.json' 66 | const FILE_RAW_DATA_3 = '/raw_data3.json' 67 | const FILE_RAW_DATA_4 = '/raw_data4.json' 68 | const FILE_RAW_DATA_5 = '/raw_data5.json' 69 | const FILE_RAW_DATA_6 = '/raw_data6.json' 70 | const FILE_RAW_DATA_7 = '/raw_data7.json' 71 | const FILE_RAW_DATA_8 = '/raw_data8.json' 72 | const FILE_RAW_DATA_9 = '/raw_data9.json' 73 | const FILE_RAW_DATA_10 = '/raw_data10.json' 74 | const FILE_RAW_DATA_11 = '/raw_data11.json' 75 | const FILE_RAW_DATA_12 = '/raw_data12.json' 76 | const FILE_RAW_DATA_13 = '/raw_data13.json' 77 | const FILE_RAW_DATA_14 = '/raw_data14.json' 78 | const FILE_RAW_DATA_15 = '/raw_data15.json' 79 | const FILE_RAW_DATA_16 = '/raw_data16.json' 80 | const FILE_RAW_DATA_17 = '/raw_data17.json' 81 | const FILE_RAW_DATA_18 = '/raw_data18.json' 82 | const FILE_RAW_DATA_19 = '/raw_data19.json' 83 | const FILE_RAW_DATA_20 = '/raw_data20.json' 84 | const FILE_RAW_DATA_21 = '/raw_data21.json' 85 | const FILE_RAW_DATA_22 = '/raw_data22.json' 86 | const FILE_RAW_DATA_23 = '/raw_data23.json' 87 | const FILE_RAW_DATA_24 = '/raw_data24.json' 88 | const FILE_RAW_DATA_25 = '/raw_data25.json' 89 | const FILE_RAW_DATA_26 = '/raw_data26.json' 90 | const FILE_RAW_DATA_27 = '/raw_data27.json' 91 | const FILE_RAW_DATA_28 = '/raw_data28.json' 92 | const FILE_RAW_DATA_29 = '/raw_data29.json' 93 | const FILE_RAW_DATA_30 = '/raw_data30.json' 94 | const FILE_RAW_DATA_31 = '/raw_data31.json' 95 | const FILE_RAW_DATA_32 = '/raw_data32.json' 96 | const FILE_DEATHS_RECOVERIES_1 = '/deaths_recoveries1.json' 97 | const FILE_DEATHS_RECOVERIES_2 = '/deaths_recoveries2.json' 98 | const FILE_DISTRICTS = '/district_wise.json' 99 | const FILE_DISTRICT_TESTING_DATA = '/district_testing_data.json' 100 | const FILE_TWITTER_QUERIES = '/twitter_queries.json' 101 | const FILE_DATA = '/data.json' 102 | const FILE_FAQ = '/faq.json' 103 | const FILE_WEBSITE_DATA = '/website_data.json' 104 | const FILE_TRAVEL_HISTORY = '/travel_history.json' 105 | const FILE_DATE_WISE_DELTA = '/states_daily.json' 106 | const FILE_STATEWISE_TESTED_DATA = '/state_test_data.json' 107 | const FILE_RESOURCES_ESSENTIALS = '/resources/resources.json' 108 | const FILE_ZONES = '/zones.json' 109 | const FILE_MISC = '/misc.json' 110 | const FILE_COWIN_VACCINE_DATA_STATEWISE = '/cowin_vaccine_data_statewise.json' 111 | const FILE_SOURCES_LIST = '/sources_list.json' 112 | const FILE_CROWDSOURCED_RESOURCES_LINKS = '/crowdsourced_resources_links.json' 113 | 114 | module.exports = { 115 | SHEET_STATES_META_DATA, 116 | SHEET_DISTRICTS_META_DATA, 117 | SHEET, 118 | SHEET_v1: SHEET_V1, 119 | SHEET_v2: SHEET_V2, 120 | SHEET_v3: SHEET_V3, 121 | SHEET_v4: SHEET_V4, 122 | SHEET_v5: SHEET_V5, 123 | SHEET_v6: SHEET_V6, 124 | SHEET_v7: SHEET_V7, 125 | SHEET_v8: SHEET_V8, 126 | SHEET_v9: SHEET_V9, 127 | SHEET_v10: SHEET_V10, 128 | SHEET_v11: SHEET_V11, 129 | SHEET_v12: SHEET_V12, 130 | SHEET_v13: SHEET_V13, 131 | SHEET_v14: SHEET_V14, 132 | SHEET_v15: SHEET_V15, 133 | SHEET_v16: SHEET_V16, 134 | SHEET_v17: SHEET_V17, 135 | SHEET_v18: SHEET_V18, 136 | SHEET_v19: SHEET_V19, 137 | SHEET_v20: SHEET_V20, 138 | SHEET_v21: SHEET_V21, 139 | SHEET_v22: SHEET_V22, 140 | SHEET_v23: SHEET_V23, 141 | SHEET_v24: SHEET_V24, 142 | SHEET_v25: SHEET_V25, 143 | SHEET_v26: SHEET_V26, 144 | SHEET_v27: SHEET_V27, 145 | SHEET_v28: SHEET_V28, 146 | SHEET_v29: SHEET_V29, 147 | SHEET_v30: SHEET_V30, 148 | SHEET_v31: SHEET_V31, 149 | SHEET_v32: SHEET_V32, 150 | SHEET_RESOURCES, 151 | SHEET_LOCALE, 152 | SHEET_RAW_DATA, 153 | SHEET_STATEWISE_TAB, 154 | SHEET_CASES_TIME_SERIES_TAB, 155 | SHEET_KEY_VALUES_TAB, 156 | SHEET_Tested_Numbers_ICMR_Data: SHEET_TESTED_NUMBERS_ICMR_DATA, 157 | SHEET_FAQ, 158 | SHEET_NAME_FACTOIDS, 159 | SHEET_TRAVEL_HISTORY, 160 | SHEET_DATE_WISE_DELTA, 161 | SHEET_StateWise_Tested_Numbers_Data: SHEET_STATEWISE_TESTED_NUMBERS_DATA, 162 | SHEET_DEATHS_AND_RECOVERIES, 163 | SHEET_SOURCES_LIST, 164 | SHEET_DISTRICT_WISE, 165 | SHEET_ZONES, 166 | SHEET_DISTRICT_TESTING_DATA, 167 | SHEET_TWITTER_QUERIES, 168 | SHEET_COWIN_VACCINE_DATA_STATEWISE, 169 | SHEET_CROWDSOURCED_RESOURCES_LINKS, 170 | DIR, 171 | // FILE_RAW_DATA, 172 | FILE_RAW_DATA_1, 173 | FILE_RAW_DATA_2, 174 | FILE_RAW_DATA_3, 175 | FILE_RAW_DATA_4, 176 | FILE_RAW_DATA_5, 177 | FILE_RAW_DATA_6, 178 | FILE_RAW_DATA_7, 179 | FILE_RAW_DATA_8, 180 | FILE_RAW_DATA_9, 181 | FILE_RAW_DATA_10, 182 | FILE_RAW_DATA_11, 183 | FILE_RAW_DATA_12, 184 | FILE_RAW_DATA_13, 185 | FILE_RAW_DATA_14, 186 | FILE_RAW_DATA_15, 187 | FILE_RAW_DATA_16, 188 | FILE_RAW_DATA_17, 189 | FILE_RAW_DATA_18, 190 | FILE_RAW_DATA_19, 191 | FILE_RAW_DATA_20, 192 | FILE_RAW_DATA_21, 193 | FILE_RAW_DATA_22, 194 | FILE_RAW_DATA_23, 195 | FILE_RAW_DATA_24, 196 | FILE_RAW_DATA_25, 197 | FILE_RAW_DATA_26, 198 | FILE_RAW_DATA_27, 199 | FILE_RAW_DATA_28, 200 | FILE_RAW_DATA_29, 201 | FILE_RAW_DATA_30, 202 | FILE_RAW_DATA_31, 203 | FILE_RAW_DATA_32, 204 | // FILE_DEATHS_RECOVERIES, 205 | FILE_DEATHS_RECOVERIES_1, 206 | FILE_DEATHS_RECOVERIES_2, 207 | SHEET_RESOURCES_SHEET, 208 | FILE_DATA, 209 | FILE_FAQ, 210 | FILE_WEBSITE_DATA, 211 | FILE_TRAVEL_HISTORY, 212 | FILE_DATE_WISE_DELTA, 213 | FILE_STATEWISE_TESTED_DATA, 214 | FILE_RESOURCES_ESSENTIALS, 215 | FILE_SOURCES_LIST, 216 | FILE_DISTRICTS, 217 | FILE_ZONES, 218 | FILE_MISC, 219 | FILE_DISTRICT_TESTING_DATA, 220 | FILE_TWITTER_QUERIES, 221 | FILE_COWIN_VACCINE_DATA_STATEWISE, 222 | FILE_CROWDSOURCED_RESOURCES_LINKS 223 | } 224 | -------------------------------------------------------------------------------- /src/sheet-to-json_generic.js: -------------------------------------------------------------------------------- 1 | const { task, fetchData, writeData } = require('../lib') 2 | const c = require('../lib/constants'); 3 | 4 | (async function main() { 5 | console.log('Running task on start...') 6 | // await task({ 7 | // sheet: c.SHEET_LOCALE, 8 | // tabs: { locales: 'od6' }, 9 | // file: '/locales.json' 10 | // }) 11 | 12 | // await task({ 13 | // sheet: c.SHEET_LOCALE, 14 | // tabs: { locales_progress: 'ou6ga5q' }, 15 | // file: '/locales_progress.json' 16 | // }) 17 | 18 | // uncomment below if v1 sheet has updates 19 | // await task({ 20 | // sheet: c.SHEET_v1, 21 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 22 | // file: c.FILE_RAW_DATA_1 23 | // }) 24 | // await task({ 25 | // sheet: c.SHEET_v2, 26 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 27 | // file: c.FILE_RAW_DATA_2 28 | // }) 29 | // await task({ 30 | // sheet: c.SHEET_v3, 31 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 32 | // file: c.FILE_RAW_DATA_3 33 | // }) 34 | 35 | // await task({ 36 | // sheet: c.SHEET_v4, 37 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 38 | // file: c.FILE_RAW_DATA_4 39 | // }) 40 | 41 | // await task({ 42 | // sheet: c.SHEET_v5, 43 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 44 | // file: c.FILE_RAW_DATA_5 45 | // }) 46 | 47 | // await task({ 48 | // sheet: c.SHEET_v6, 49 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 50 | // file: c.FILE_RAW_DATA_6 51 | // }) 52 | 53 | // await task({ 54 | // sheet: c.SHEET_v7, 55 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 56 | // file: c.FILE_RAW_DATA_7 57 | // }) 58 | 59 | // await task({ 60 | // sheet: c.SHEET_v8, 61 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 62 | // file: c.FILE_RAW_DATA_8 63 | // }) 64 | 65 | // await task({ 66 | // sheet: c.SHEET_v9, 67 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 68 | // file: c.FILE_RAW_DATA_9 69 | // }) 70 | 71 | // await task({ 72 | // sheet: c.SHEET_v10, 73 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 74 | // file: c.FILE_RAW_DATA_10 75 | // }) 76 | 77 | // await task({ 78 | // sheet: c.SHEET_v11, 79 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 80 | // file: c.FILE_RAW_DATA_11 81 | // }) 82 | 83 | // await task({ 84 | // sheet: c.SHEET_v12, 85 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 86 | // file: c.FILE_RAW_DATA_12 87 | // }) 88 | 89 | // await task({ 90 | // sheet: c.SHEET_v13, 91 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 92 | // file: c.FILE_RAW_DATA_13 93 | // }) 94 | 95 | // await task({ 96 | // sheet: c.SHEET_v14, 97 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 98 | // file: c.FILE_RAW_DATA_14 99 | // }) 100 | 101 | // await task({ 102 | // sheet: c.SHEET_v15, 103 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 104 | // file: c.FILE_RAW_DATA_15 105 | // }) 106 | 107 | // await task({ 108 | // sheet: c.SHEET_v16, 109 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 110 | // file: c.FILE_RAW_DATA_16 111 | // }) 112 | 113 | // await task({ 114 | // sheet: c.SHEET_v17, 115 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 116 | // file: c.FILE_RAW_DATA_17 117 | // }) 118 | 119 | // await task({ 120 | // sheet: c.SHEET_v18, 121 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 122 | // file: c.FILE_RAW_DATA_18 123 | // }) 124 | 125 | // await task({ 126 | // sheet: c.SHEET_v19, 127 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 128 | // file: c.FILE_RAW_DATA_19 129 | // }) 130 | 131 | // await task({ 132 | // sheet: c.SHEET_v20, 133 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 134 | // file: c.FILE_RAW_DATA_20 135 | // }) 136 | 137 | // await task({ 138 | // sheet: c.SHEET_v21, 139 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 140 | // file: c.FILE_RAW_DATA_21 141 | // }) 142 | 143 | // await task({ 144 | // sheet: c.SHEET_v22, 145 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 146 | // file: c.FILE_RAW_DATA_22 147 | // }) 148 | 149 | // await task({ 150 | // sheet: c.SHEET_v23, 151 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 152 | // file: c.FILE_RAW_DATA_23 153 | // }) 154 | 155 | // await task({ 156 | // sheet: c.SHEET_v24, 157 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 158 | // file: c.FILE_RAW_DATA_24 159 | // }) 160 | 161 | // await task({ 162 | // sheet: c.SHEET_v25, 163 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 164 | // file: c.FILE_RAW_DATA_25 165 | // }) 166 | 167 | // await task({ 168 | // sheet: c.SHEET_v26, 169 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 170 | // file: c.FILE_RAW_DATA_26 171 | // }) 172 | 173 | // await task({ 174 | // sheet: c.SHEET_v27, 175 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 176 | // file: c.FILE_RAW_DATA_27 177 | // }) 178 | 179 | // await task({ 180 | // sheet: c.SHEET_v28, 181 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 182 | // file: c.FILE_RAW_DATA_28 183 | // }) 184 | 185 | // await task({ 186 | // sheet: c.SHEET_v29, 187 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 188 | // file: c.FILE_RAW_DATA_29 189 | // }) 190 | 191 | // await task({ 192 | // sheet: c.SHEET_v30, 193 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 194 | // file: c.FILE_RAW_DATA_30 195 | // }) 196 | 197 | // await task({ 198 | // sheet: c.SHEET_v31, 199 | // tabs: { raw_data: c.SHEET_RAW_DATA }, 200 | // file: c.FILE_RAW_DATA_31 201 | // }) 202 | 203 | await task({ 204 | sheet: c.SHEET_v32, 205 | tabs: { raw_data: c.SHEET_RAW_DATA }, 206 | file: c.FILE_RAW_DATA_32 207 | }) 208 | 209 | // uncomment below if v1 sheet has updates 210 | // await task({ 211 | // sheet: c.SHEET_v1, 212 | // tabs: { deaths_recoveries: c.SHEET_DEATHS_AND_RECOVERIES }, 213 | // file: c.FILE_DEATHS_RECOVERIES_1 214 | // }) 215 | 216 | // await task({ 217 | // sheet: c.SHEET_v2, 218 | // tabs: { deaths_recoveries: c.SHEET_DEATHS_AND_RECOVERIES }, 219 | // file: c.FILE_DEATHS_RECOVERIES_2 220 | // }) 221 | 222 | // await task({ 223 | // sheet: c.SHEET, 224 | // tabs: { 225 | // state_meta_data: c.SHEET_STATES_META_DATA, 226 | // district_meta_data: c.SHEET_DISTRICTS_META_DATA 227 | // }, 228 | // file: c.FILE_MISC 229 | // }) 230 | 231 | // await task({ 232 | // sheet: c.SHEET, 233 | // tabs: { 234 | // districts: c.SHEET_DISTRICT_WISE 235 | // }, 236 | // file: c.FILE_DISTRICTS 237 | // }) 238 | 239 | // await task({ 240 | // sheet: c.SHEET, 241 | // tabs: { 242 | // travel_history: c.SHEET_TRAVEL_HISTORY 243 | // }, 244 | // file: c.FILE_TRAVEL_HISTORY 245 | // }); 246 | 247 | // await task({ 248 | // sheet: c.SHEET, 249 | // tabs: { 250 | // factoids: c.SHEET_NAME_FACTOIDS, faq: c.SHEET_FAQ 251 | // }, 252 | // file: c.FILE_WEBSITE_DATA 253 | // }) 254 | 255 | // need to remove objects with empty states or empty totaltested 256 | var data = await fetchData({ 257 | sheet: c.SHEET, 258 | tabs: { 259 | states_tested_data: c.SHEET_StateWise_Tested_Numbers_Data 260 | } 261 | }) 262 | data.states_tested_data.forEach(function (item, index, object) { 263 | if (!item.totaltested || !item.state) { 264 | object.splice(index, 1) 265 | } 266 | }) 267 | await writeData({ file: c.FILE_STATEWISE_TESTED_DATA, data }) 268 | 269 | await task({ 270 | sheet: c.SHEET, 271 | tabs: { states_daily: c.SHEET_DATE_WISE_DELTA }, 272 | file: c.FILE_DATE_WISE_DELTA 273 | }) 274 | 275 | await task({ 276 | sheet: c.SHEET, 277 | tabs: { 278 | statewise: c.SHEET_STATEWISE_TAB, 279 | cases_time_series: c.SHEET_CASES_TIME_SERIES_TAB, 280 | tested: c.SHEET_Tested_Numbers_ICMR_Data 281 | }, 282 | file: c.FILE_DATA 283 | }) 284 | 285 | await task({ 286 | sheet: c.SHEET, 287 | tabs: { sources_list: c.SHEET_SOURCES_LIST }, 288 | file: c.FILE_SOURCES_LIST 289 | }) 290 | 291 | await task({ 292 | sheet: c.SHEET_RESOURCES, 293 | tabs: { resources: c.SHEET_RESOURCES_SHEET }, 294 | file: c.FILE_RESOURCES_ESSENTIALS 295 | }) 296 | 297 | await task({ 298 | sheet: c.SHEET, 299 | tabs: { zones: c.SHEET_ZONES }, 300 | file: c.FILE_ZONES 301 | }) 302 | 303 | await task({ 304 | sheet: c.SHEET, 305 | tabs: { district_testing_data: c.SHEET_DISTRICT_TESTING_DATA }, 306 | file: c.FILE_DISTRICT_TESTING_DATA 307 | }) 308 | 309 | await task({ 310 | sheet: c.SHEET, 311 | tabs: { twitter_queries: c.SHEET_TWITTER_QUERIES }, 312 | file: c.FILE_TWITTER_QUERIES 313 | }) 314 | 315 | await task({ 316 | sheet: c.SHEET, 317 | tabs: { crowdsourcd_resources_links : c.SHEET_CROWDSOURCED_RESOURCES_LINKS }, 318 | file: c.FILE_CROWDSOURCED_RESOURCES_LINKS 319 | }) 320 | 321 | console.log('End of sheet-to-json_generic') 322 | })() 323 | -------------------------------------------------------------------------------- /documentation/csv/index.md: -------------------------------------------------------------------------------- 1 | # COVID19-India API 2 | 3 | ## Announcement 4 | 5 | We are replacing with to accomdate breakup of doses adminstered of the data provided by MOFHW. We will end the support of the earlier one by July 10. 6 | 7 | We have stopped capturing testing data at a district level. Please check the status of the API endpoints below. 8 | ## CSV 9 | 10 | Sometimes, having files in a spreadsheet format is more useful for analysts and scientists. We have provided the files as downloadable csv files as below. 11 | 12 | ### Files available 13 | 14 | Latest data from the google sheet (10-20 minutes delayed) is available through the `latest` end-point. 15 | These are the files available 16 | 17 | #### Raw Data 18 | 19 | | Status | Sheet Name | Link to CSV | Description | 20 | | ------------- | ---------- | -------------------------------------------------------- | ---------------------- | 21 | | :green_heart: | raw_data1 | | Till Apr 19th | 22 | | :green_heart: | raw_data2 | | Apr 20th to Apr 26th | 23 | | :green_heart: | raw_data3 | | Apr 27th to May 9th | 24 | | :green_heart: | raw_data4 | | May 10th to May 23rd | 25 | | :green_heart: | raw_data5 | | May 24th to Jun 4th | 26 | | :green_heart: | raw_data6 | | Jun 05th to Jun 19th | 27 | | :green_heart: | raw_data7 | | Jun 20th to Jun 30th | 28 | | :green_heart: | raw_data8 | | Jul 01st to Jul 7th | 29 | | :green_heart: | raw_data9 | | Jul 08th to Jul 13th | 30 | | :green_heart: | raw_data10 | | Jul 14th to Jul 17th | 31 | | :green_heart: | raw_data11 | | Jul 18th to Jul 22nd | 32 | | :green_heart: | raw_data12 | | Jul 23th to Aug 06th | 33 | | :green_heart: | raw_data13 | | Aug 07th to Aug 21st | 34 | | :green_heart: | raw_data14 | | Aug 22nd to Sep 05th | 35 | | :green_heart: | raw_data15 | | Sep 06th to Sep 21st | 36 | | :green_heart: | raw_data16 | | Sep 22nd to Oct 08th | 37 | | :green_heart: | raw_data17 | | Oct 09th to Oct 26th | 38 | | :green_heart: | raw_data18 | | Oct 27th to Nov 12th | 39 | | :green_heart: | raw_data19 | | Nov 13th to Nov 30th | 40 | | :green_heart: | raw_data20 | | Dec 01st to Dec 19th | 41 | | :green_heart: | raw_data21 | | Dec 20th to Jan 08th | 42 | | :green_heart: | raw_data22 | | Jan 09th to Jan 31st | 43 | | :green_heart: | raw_data23 | | Feb 01st to Feb 27st | 44 | | :green_heart: | raw_data24 | | Feb 28th to Mar 31st | 45 | | :green_heart: | raw_data25 | | Apr 01st to Apr 20th | 46 | | :green_heart: | raw_data26 | | Apr 21st to May 04th | 47 | | :green_heart: | raw_data27 | | May 05th to May 17th | 48 | | :green_heart: | raw_data28 | | May 18th to Jun 02nd | 49 | | :green_heart: | raw_data29 | | Jun 03rd to Jun 19th | 50 | | :green_heart: | raw_data30 | | Jun 20th to Jul 06th | 51 | | :green_heart: | raw_data31 | | Jul 07th to Jul 27th | 52 | | :green_heart: | raw_data32 | | Jul 28th onwards | 53 | 54 | #### Other Sheets 55 | 56 | | Status | Sheet Name | Link to CSV | Description | 57 | | ------------- | ----------------------------- | --------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | 58 | | :green_heart: | case_time_series | | Time series of Confirmed, Recovered and Deceased cases in India 59 | | :green_heart: | state_wise | | The latest State-wise situation | | 60 | | :green_heart: | district_wise | | The latest District-wise situation | 61 | | :green_heart: | state_wise_daily | | Statewise timeseries of Confirmed, Recovered and Deceased numbers. 62 | | :green_heart: | states | | Statewise timeseries of Confirmed, Recovered and Deceased numbers in long format 63 | | :green_heart: | districts | | Districtwise timeseries of Confirmed, Recovered and Deceased numbers in long format | 64 | | :green_heart: | statewise_tested_numbers_data | | Number of tests conducted in each state, ventilators ,hospital bed occupany reported in state bulletins | 65 | | :green_heart: | tested_numbers_icmr_data | | Number of tests reported by ICMR | 66 | | :green_heart: | icmr_labs_statewise | | Number of Labs in each state as per ICMR | 67 | | :green_heart: | sources_list | | List of sources that we are using. | 68 | | :green_heart: | rtpcr_samples_collected | | Number of RTPCR samples collected statewise in ICMR Application | 69 | | :green_heart: | vaccine_doses_administered_statewise | | Number of vaccine doses administered statewise (support will end by July 10 use v2 one below for more features) | 70 | | :green_heart: | vaccine_doses_administered_statewise | | Number of vaccine doses administered statewise by MOFHW (for replacement of the above sheet i.e format in order to accomdate division of Doses | 71 | | :green_heart: | cowin_vaccine_data_statewise | | Key data points from CoWin database at a state level | 72 | | :green_heart: | cowin_vaccine_data_districtwise | | Key data points from CoWin database at a district level | 73 | 74 | #### Note 75 | 76 | - Use raw data files only if you need to analyze the demographics or notes related at a patient level 77 | - Always try to use the aggregated numbers above as they have been treated for discrepancies 78 | 79 | #### Contributing 80 | 81 | - If you notice issues, have questions or want to suggest enhancements, please raise an issue in the repo. 82 | 83 | #### Quick Links 84 | 85 | A more detailed note of the columns present in the data may be found in the json documentation 86 | 87 | - [Documentation](https://api.covid19india.org/documentation) 88 | -------------------------------------------------------------------------------- /src/sheets-to-csv.js: -------------------------------------------------------------------------------- 1 | const fetch = require('node-fetch') 2 | var fs = require('fs') 3 | 4 | var dir = './tmp/csv/' 5 | if (!fs.existsSync(dir)) { 6 | fs.mkdirSync(dir, { recursive: true }) 7 | } 8 | 9 | var latestDir = dir + 'latest' 10 | 11 | if (!fs.existsSync(latestDir)) { 12 | fs.mkdirSync(latestDir) 13 | } 14 | 15 | // Published sheets 16 | const PUBLISHED_SHEET_ID_1 = '2PACX-1vSz8Qs1gE_IYpzlkFkCXGcL_BqR8hZieWVi-rphN1gfrO3H4lDtVZs4kd0C3P8Y9lhsT1rhoB-Q_cP4' 17 | const PUBLISHED_SHEET_ID_2 = '2PACX-1vRodtoTyQwXckfuvuQllkMhGC_gruigaaizVc8I6-BZWeetYpmRyexnO75ep7rnSxFICd8c9dfpwU8I' 18 | const PUBLISHED_SHEET_ID_3 = '2PACX-1vR_17UovavD4X7m_pqzmXjA_kCjGxIapemdWpRhDELHR1LbLJ-EVbxjKgeQat489BFRZ9bqMf-ILe_H' 19 | const PUBLISHED_SHEET_ID_4 = '2PACX-1vSeAoAk_iMv7cQ0tldZC7aivJmGKM5Wpc5VVr37Nzv-geTmtr6pDMb-oDK59RS21Om80-SYR3jRp6qq' 20 | const PUBLISHED_SHEET_ID_5 = '2PACX-1vSEikAgjAB9x7yhx4zNOUGLIx8Zfy2mAzRv0K1tbw08g73MO88-bbWCsgmhJ0uXa0gtuUlLMOnE9h26' 21 | const PUBLISHED_SHEET_ID_6 = '2PACX-1vQQmgjCktQknnTPy-s4OFycu-imtoMCrWY5M2Lqig3nhGyy6W5E27xbCyaaKV9lGaDWmTzGWVzPH9-S' 22 | const PUBLISHED_SHEET_ID_7 = '2PACX-1vR6blqV85tiBO-9u4MCW72qXALS3f7yQD0iV47MbsmIcKrvBDTorIVrUJ96QrxUj7iwAviYiecjp8VU' 23 | const PUBLISHED_SHEET_ID_8 = '2PACX-1vR1zl3JStozuCgPsPol19f9k_io1ABmHS_mOl9gzWxiDd2_WvWhdfhePXBFZIUFjpW-gPfPwE9m7AA_' 24 | const PUBLISHED_SHEET_ID_9 = '2PACX-1vRb4AsEPrV4b0S4j2vQku-J5XHnh8c_8fzmIhD2S2aMc2if7g6bLwJNYOPV8UmrrNR-Bv0C0yjcUnU3' 25 | const PUBLISHED_SHEET_ID_10 = '2PACX-1vQyBRow24Pc7Wm_mSjU3JDy_Ua5mFByz6zE7-vFguBvUOdcr-90PgNcTBOCL-nTa40WrghiAN-kSFVX' 26 | const PUBLISHED_SHEET_ID_11 = '2PACX-1vTd_tTI33CBI4obGaKTo0dfw1cNu5dUz4OIIhdbWJVmZJlEVslMyWzky1ifb9uRmV0siVxneBW4iBwi' 27 | const PUBLISHED_SHEET_ID_12 = '2PACX-1vRCvn9X8LdOLYpiq_8U8Ihw8m_q0Lrl0Gkx4kJ22dhxX9Biy-Bhc0KWWxFQ9Fk2oS5pjgPNEd4I4XHD' 28 | const PUBLISHED_SHEET_ID_13 = '2PACX-1vT6RKqvY0VzMaN7pKyYPyVXvUYR5cu3L5Z0sTeayDRE72xCXqVU-rhgyAucjGMJDDG5rXRKInPChqrJ' 29 | const PUBLISHED_SHEET_ID_14 = '2PACX-1vQujsobkf1XNHg60LutKI8SXXITPGEtSx7F2sR0rBIm_FnFqXKfhz1MnZ1hIAyVAyhbPXbaf5NLXG-Q' 30 | const PUBLISHED_SHEET_ID_15 = '2PACX-1vTsiPxkxMFJWmQQegSkpZgf3dNLqY7gc4msrnCbARgdNrr0wa9dbEDmtW9OzCrGeAmLDL1idbxU_gUk' 31 | const PUBLISHED_SHEET_ID_16 = '2PACX-1vTDdjG51mUgQXFlBigDAF5QTpA9YL9XbhVZzjKSMqcsrD3dx9LeJfGdyBabsReECgyazhCNd3YOHQOa' 32 | const PUBLISHED_SHEET_ID_17 = '2PACX-1vTWUT8wCTjJvROykckn6C30jNt2YVqS6zWyxKs4t0YtKfNAzJ7hxh7OggnZ3RjRokxYqSgvYEON9icz' 33 | const PUBLISHED_SHEET_ID_18 = '2PACX-1vSDMdBWod4Db0eiA052qK1mtgzAAT7JnUeend__jvvPVKeo9Bhp8ur1Z4D41yiq8aGiAZkjsWn1KnZe' 34 | const PUBLISHED_SHEET_ID_19 = '2PACX-1vQRxJ4zOl__ylTmlS5AnGKELJINLUElsRvJQp8YQBK7iPsjRzl1ApUwtgZJWeRYeQvlP2CCCeDuuFV2' 35 | const PUBLISHED_SHEET_ID_20 = '2PACX-1vSeQYg9K6w1B9sfUAMfviYLh8r6n6uk9Bf4g7miDotpeV9j8bC_mZvco0Xr3J-zgv3aj7Xm8mc7GMAJ' 36 | const PUBLISHED_SHEET_ID_21 = '2PACX-1vQT9ukpSK16ubwCFSSsZQHVlggAElw9999Rbtdsg9Opk_LyOCUDbMsAW5oiWxLjG4s0IudlLmhCVZuv' 37 | const PUBLISHED_SHEET_ID_22 = '2PACX-1vTwNTdGXrAa-RHrvb-bE51qE2UMEVO9tb45o_W_djHESofOI5purP0hq4VlBl6Qr1u3F4uR8N7a0nmE' 38 | const PUBLISHED_SHEET_ID_23 = '2PACX-1vRtezerCCxfVaYMerTEyPITsoDO_dx5UpTNcsU6YE8tTsmCflpHmx_0NZDKgPu47wB0_2vfIuxXopnF' 39 | const PUBLISHED_SHEET_ID_24 = '2PACX-1vTVvDzqqG3GF1hyvQ9AAq7SXFr0FizVordgnL9kVAionTzwrgIM3pvMY6o1K8EoMnU1oiIJYZ2Ju6V_' 40 | const PUBLISHED_SHEET_ID_25 = '2PACX-1vSJL0LHDI8dBKtTM3Q3mozy4AW3GUfunNSkkrdrftX3CgjQ3JyrxkqBU_6sr44T2wkZR6NMq2vp1UAf' 41 | const PUBLISHED_SHEET_ID_26 = '2PACX-1vQGs1RLLszaHsZa59xaA-z-de1dZR0mBEX2K3GcJ_OSlE0zI05oTw22-nFcIUypcxpSKhEJaFG0I9SJ' 42 | const PUBLISHED_SHEET_ID_27 = '2PACX-1vT5IUH2r83DCLWxBkfuiQOmYTi0Re41KYh2H7tq4_J-FPm82S7eDOeAmfY3rSfTyNysHMNL88EctL-N' 43 | const PUBLISHED_SHEET_ID_28 = '2PACX-1vT2QMw9OyjcazyGXuHMj-jQ81oMnDDITU3q7fOLpcZ2-W81sNilWB0GdC5v2yv-0e0aMZnr-ZJAFowz' 44 | const PUBLISHED_SHEET_ID_29 = '2PACX-1vTu2gcDkpfLS6g2_eYUtLBtQ3AaqC2UaSEHA20eCkgM41np7BZn0tE-mQ1ojcpr6t9R33WlTV4YHfve' 45 | const PUBLISHED_SHEET_ID_30 = '2PACX-1vSH9k7EkLLVbnIk-whp4A9mx6WDoTQRVDVZGsHU812eHn65Uj08oL7i4kCaE2Mv01SLa1FuaHD35Nf4' 46 | const PUBLISHED_SHEET_ID_31 = '2PACX-1vRboZ3VJ6RTk0md4CI9cwk-K2QwCkCTsnvMEa4rnK1uSd0WubZgo5hnfReaD2EZ0ZpQHzkRuckJrsHk' 47 | const PUBLISHED_SHEET_ID_32 = '2PACX-1vTt1y5sQRZfLp0OjGSnDL59oNjDPsEKnrEIlSfpN491LM-f1dtoQGHfNwHFZSkW0-WShT6nWfGpdQzn' 48 | 49 | 50 | const SHEETS_V1 = [ 51 | ['raw_data1', '0'], 52 | ['death_and_recovered1', '200733542'] 53 | ] 54 | 55 | const SHEETS_V2 = [ 56 | ['raw_data2', '0'], 57 | ['death_and_recovered2', '200733542'] 58 | ] 59 | 60 | const SHEETS_V3 = [ 61 | ['raw_data3', '0'], 62 | ['districts_26apr_gospel', '1964493192'] 63 | ] 64 | 65 | const SHEETS_V4 = [ 66 | ['raw_data4', '0'] 67 | ] 68 | 69 | const SHEETS_V5 = [ 70 | ['raw_data5', '0'] 71 | ] 72 | 73 | const SHEETS_V6 = [ 74 | ['raw_data6', '0'] 75 | ] 76 | 77 | const SHEETS_V7 = [ 78 | ['raw_data7', '0'] 79 | ] 80 | 81 | const SHEETS_V8 = [ 82 | ['raw_data8', '0'] 83 | ] 84 | 85 | const SHEETS_V9 = [ 86 | ['raw_data9', '0'] 87 | ] 88 | 89 | const SHEETS_V10 = [ 90 | ['raw_data10', '0'] 91 | ] 92 | 93 | const SHEETS_V11 = [ 94 | ['raw_data11', '0'] 95 | ] 96 | 97 | const SHEETS_V12 = [ 98 | ['raw_data12', '0'] 99 | ] 100 | 101 | const SHEETS_V13 = [ 102 | ['raw_data13', '0'] 103 | ] 104 | 105 | const SHEETS_V14 = [ 106 | ['raw_data14', '0'] 107 | ] 108 | 109 | const SHEETS_V15 = [ 110 | ['raw_data15', '0'] 111 | ] 112 | 113 | const SHEETS_V16 = [ 114 | ['raw_data16', '0'] 115 | ] 116 | 117 | const SHEETS_V17 = [ 118 | ['raw_data17', '0'] 119 | ] 120 | 121 | const SHEETS_V18 = [ 122 | ['raw_data18', '0'] 123 | ] 124 | 125 | const SHEETS_V19 = [ 126 | ['raw_data19', '0'] 127 | ] 128 | 129 | const SHEETS_V20 = [ 130 | ['raw_data20', '0'] 131 | ] 132 | 133 | const SHEETS_V21 = [ 134 | ['raw_data21', '0'] 135 | ] 136 | 137 | const SHEETS_V22 = [ 138 | ['raw_data22', '0'] 139 | ] 140 | 141 | const SHEETS_V23 = [ 142 | ['raw_data23', '0'] 143 | ] 144 | 145 | const SHEETS_V24 = [ 146 | ['raw_data24', '0'] 147 | ] 148 | 149 | const SHEETS_V25 = [ 150 | ['raw_data25', '0'] 151 | ] 152 | 153 | const SHEETS_V26 = [ 154 | ['raw_data26', '0'] 155 | ] 156 | 157 | const SHEETS_V27 = [ 158 | ['raw_data27', '0'] 159 | ] 160 | 161 | const SHEETS_V28 = [ 162 | ['raw_data28', '0'] 163 | ] 164 | 165 | const SHEETS_V29 = [ 166 | ['raw_data29', '0'] 167 | ] 168 | 169 | const SHEETS_V30 = [ 170 | ['raw_data30', '0'] 171 | ] 172 | 173 | const SHEETS_V31 = [ 174 | ['raw_data31', '0'] 175 | ] 176 | 177 | const SHEETS_V32 = [ 178 | ['raw_data32', '0'], 179 | ['state_wise', '1896310216'], 180 | ['state_wise_daily', '1395461826'], 181 | ['sources_list', '704389477'], 182 | ['district_wise', '227379561'], 183 | ['statewise_tested_numbers_data', '486127050'], 184 | ['case_time_series', '387368559'], 185 | ['tested_numbers_icmr_data', '2143634168'], 186 | // ["travel_history", "1532084277"], 187 | ['district_list', '1207378023'], 188 | // ['district_testing', '458610673'], 189 | // ['icmr_labs_statewise','847799380'], 190 | // ['icmr_rtpcr_tests_daily','1032515506'], 191 | // ['vaccine_doses_statewise','1601004575'], 192 | ['cowin_vaccine_data_statewise','1770661428'], 193 | ['cowin_vaccine_data_districtwise','382746758'], 194 | ['vaccine_doses_statewise_v2','1097927328'] 195 | ] 196 | 197 | async function sheetsToCSV (sheets, pubId) { 198 | for (var element of sheets) { 199 | console.log('Reading: ' + element[0]) 200 | var tempUrl = 'https://docs.google.com/spreadsheets/d/e/' + pubId + '/pub?gid=' + element[1] + '&single=false&output=csv' 201 | console.log(tempUrl) 202 | var url = encodeURI(tempUrl) 203 | const settings = { method: 'Get' } 204 | await fetch(url, settings).then(res => res.text()) 205 | .then(csv => { 206 | if (csv.includes('')) { 207 | console.error('probably not csv!') 208 | process.exit(1) 209 | } else { 210 | // fs.writeFileSync(today_dir + "/" + element[0] + ".csv", csv); 211 | fs.writeFileSync(latestDir + '/' + element[0] + '.csv', csv) 212 | console.log('Write completed: ' + element[0]) 213 | } 214 | }) 215 | }; 216 | } 217 | 218 | (async function main () { 219 | // uncomment below and run when changes in v1 sheet 220 | // await sheetsToCSV(SHEETS_V1, PUBLISHED_SHEET_ID_1) 221 | // await sheetsToCSV(SHEETS_V2, PUBLISHED_SHEET_ID_2) 222 | // await sheetsToCSV(SHEETS_V3, PUBLISHED_SHEET_ID_3) 223 | // await sheetsToCSV(SHEETS_V4, PUBLISHED_SHEET_ID_4) 224 | // await sheetsToCSV(SHEETS_V5, PUBLISHED_SHEET_ID_5) 225 | // await sheetsToCSV(SHEETS_V6, PUBLISHED_SHEET_ID_6) 226 | // await sheetsToCSV(SHEETS_V7, PUBLISHED_SHEET_ID_7) 227 | // await sheetsToCSV(SHEETS_V8, PUBLISHED_SHEET_ID_8) 228 | // await sheetsToCSV(SHEETS_V9, PUBLISHED_SHEET_ID_9) 229 | // await sheetsToCSV(SHEETS_V10, PUBLISHED_SHEET_ID_10) 230 | // await sheetsToCSV(SHEETS_V11, PUBLISHED_SHEET_ID_11) 231 | // await sheetsToCSV(SHEETS_V12, PUBLISHED_SHEET_ID_12) 232 | // await sheetsToCSV(SHEETS_V13, PUBLISHED_SHEET_ID_13) 233 | // await sheetsToCSV(SHEETS_V14, PUBLISHED_SHEET_ID_14) 234 | // await sheetsToCSV(SHEETS_V15, PUBLISHED_SHEET_ID_15) 235 | // await sheetsToCSV(SHEETS_V16, PUBLISHED_SHEET_ID_16) 236 | // await sheetsToCSV(SHEETS_V17, PUBLISHED_SHEET_ID_17) 237 | // await sheetsToCSV(SHEETS_V18, PUBLISHED_SHEET_ID_18) 238 | // await sheetsToCSV(SHEETS_V19, PUBLISHED_SHEET_ID_19) 239 | // await sheetsToCSV(SHEETS_V20, PUBLISHED_SHEET_ID_20) 240 | // await sheetsToCSV(SHEETS_V21, PUBLISHED_SHEET_ID_21) 241 | // await sheetsToCSV(SHEETS_V22, PUBLISHED_SHEET_ID_22) 242 | // await sheetsToCSV(SHEETS_V23, PUBLISHED_SHEET_ID_23) 243 | // await sheetsToCSV(SHEETS_V24, PUBLISHED_SHEET_ID_24) 244 | // await sheetsToCSV(SHEETS_V25, PUBLISHED_SHEET_ID_25) 245 | // await sheetsToCSV(SHEETS_V26, PUBLISHED_SHEET_ID_26) 246 | // await sheetsToCSV(SHEETS_V27, PUBLISHED_SHEET_ID_27) 247 | // await sheetsToCSV(SHEETS_V28, PUBLISHED_SHEET_ID_28) 248 | // await sheetsToCSV(SHEETS_V29, PUBLISHED_SHEET_ID_29) 249 | // await sheetsToCSV(SHEETS_V30, PUBLISHED_SHEET_ID_30) 250 | await sheetsToCSV(SHEETS_V31, PUBLISHED_SHEET_ID_31) 251 | await sheetsToCSV(SHEETS_V32, PUBLISHED_SHEET_ID_32) 252 | })() 253 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # COVID19-India API 2 | 3 | ## Announcement 4 | **We have stopped capturing testing data at a district level. Please check the status of the API endpoints below.** 5 | 6 | 1. As of 13th August this repository and api.covid19india.org have been deprecated. We redirect api.covid19india.org to data.covid19india.org. Please refer to [this](https://github.com/covid19india/data) repository for the latest endpoint. 7 | 2. As of 13th August, all json endpoints except v4 endpoints have been deprecated. Please use the csv endpoints or refer to v4 endpoints 8 | 9 | 10 | ### Files available 11 | 12 | - Aggregated sheets provide aggregated data at the district/state levels in csv format. 13 | - V4 json endpoints. These are the json apis that are used by the website to show all the statistics on the site. These can be used by developers and analysts who have knowledge of json parsing (recommended approach). All our v4 endpoints are actively developed and in use since this serves the frontend view [Documentation for the same](https://api.covid19india.org/documentation). 14 | - Latest data from the google sheet (10-20 minutes delayed) is available through the `latest` end-point. These are present under the `raw files` section below. (Not recommended since the number of files is huge and there is no additional information present in these as compared to the above mentioned endpoints.) 15 | 16 | ### V4 JSON endpoints 17 | 18 | | Status | Link to API | Description | 19 | | ------------- | -------------------------------------------------------- | ---------------------- | 20 | | :green_heart: | | Daily numbers across C,R,D and Tested per state (historical data). | 21 | | :green_heart: | | Current day numbers across districts and states. | 22 | | :green_heart: | | Per day numbers across districts and states - consider using timeseries in place of this. This is a huge file and is a mix of timeseries and data.min.json | 23 | 24 | **Note**: Please consider using the above endpoints for all your data needs. All the data we show on the website is fuelled by the above endpoints. 25 | 26 | #### Aggregated Sheets (CSV) 27 | 28 | | Status | Sheet Name | Link to CSV | Description | 29 | | ------------- | ----------------------------- | --------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | 30 | | :green_heart: | case_time_series | | India level timeseries for Confirmed, Recovered and Deceased cases 31 | | :green_heart: | states | | Statewise timeseries of Confirmed, Recovered and Deceased numbers. 32 | | :green_heart: | districts | | Districtwise timeseries of Confirmed, Recovered and Deceased numbers. | 33 | | :green_heart: | state_wise_daily | | Statewise per day delta of Confirmed, Recovered and Deceased numbers. 34 | | :green_heart: | state_wise | | Statewise cumulative numbers till date. | 35 | | :green_heart: | district_wise | | Districtwise Cumulative numbers till date. | 36 | | :green_heart: | statewise_tested_numbers_data | | Number of tests conducted in each state, ventilators ,hospital bed occupany reported in state bulletins | 37 | | :green_heart: | tested_numbers_icmr_data | | Number of tests reported by ICMR | 38 | | :green_heart: | icmr_labs_statewise | | Number of Labs in each state as per ICMR | 39 | | :green_heart: | sources_list | | List of sources that we are using. | 40 | | :green_heart: | rtpcr_samples_collected | | Number of RTPCR samples collected statewise in ICMR Application | 41 | | :green_heart: | vaccine_doses_administered_statewise | | Number of vaccine doses administered statewise - Collected from MOHFW daily bulletin | 42 | | :green_heart: | cowin_vaccine_data_statewise | | Key data points from CoWin database at a state level | 43 | | :green_heart: | cowin_vaccine_data_districtwise | | Key data points from CoWin database at a district level | 44 | 45 | Latest data from the google sheet (10-20 minutes delayed) is available through the `latest` end-point. 46 | These endpoints should be avoided unless none of the above endpoints work for you. 47 | 48 | #### Raw Data 49 | 50 | | Status | Sheet Name | Link to CSV | Description | 51 | | ------------- | ---------- | -------------------------------------------------------- | ---------------------- | 52 | | :green_heart: | raw_data1 | | Till Apr 19th | 53 | | :green_heart: | raw_data2 | | Apr 20th to Apr 26th | 54 | | :green_heart: | raw_data3 | | Apr 27th to May 9th | 55 | | :green_heart: | raw_data4 | | May 10th to May 23rd | 56 | | :green_heart: | raw_data5 | | May 24th to Jun 4th | 57 | | :green_heart: | raw_data6 | | Jun 05th to Jun 19th | 58 | | :green_heart: | raw_data7 | | Jun 20th to Jun 30th | 59 | | :green_heart: | raw_data8 | | Jul 01st to Jul 7th | 60 | | :green_heart: | raw_data9 | | Jul 08th to Jul 13th | 61 | | :green_heart: | raw_data10 | | Jul 14th to Jul 17th | 62 | | :green_heart: | raw_data11 | | Jul 18th to Jul 22nd | 63 | | :green_heart: | raw_data12 | | Jul 23th to Aug 06th | 64 | | :green_heart: | raw_data13 | | Aug 07th to Aug 21st | 65 | | :green_heart: | raw_data14 | | Aug 22nd to Sep 05th | 66 | | :green_heart: | raw_data15 | | Sep 06th to Sep 21st | 67 | | :green_heart: | raw_data16 | | Sep 22nd to Oct 08th | 68 | | :green_heart: | raw_data17 | | Oct 09th to Oct 26th | 69 | | :green_heart: | raw_data18 | | Oct 27th to Nov 12th | 70 | | :green_heart: | raw_data19 | | Nov 13th to Nov 30th | 71 | | :green_heart: | raw_data20 | | Dec 01st to Dec 19th | 72 | | :green_heart: | raw_data21 | | Dec 20th to Jan 08th | 73 | | :green_heart: | raw_data22 | | Jan 09th to Jan 31st | 74 | | :green_heart: | raw_data23 | | Feb 01st to Feb 27st | 75 | | :green_heart: | raw_data24 | | Feb 28th to Mar 31st | 76 | | :green_heart: | raw_data25 | | Apr 01st to Apr 20th | 77 | | :green_heart: | raw_data26 | | Apr 21st to May 04th | 78 | | :green_heart: | raw_data27 | | May 05th to May 17th | 79 | | :green_heart: | raw_data28 | | May 18th to Jun 02nd | 80 | | :green_heart: | raw_data29 | | Jun 03rd to Jun 19th | 81 | | :green_heart: | raw_data30 | | Jun 20th to Jul 06th | 82 | | :green_heart: | raw_data31 | | Jul 07th to Jul 27th | 83 | | :green_heart: | raw_data32 | | Jul 28th onwards | 84 | 85 | #### Note 86 | 87 | - Avoid using raw sheets. The only reason to use raw sheets would be to refer to demographics (rarely available) or to sources of numbers 88 | - Always try to use the aggregated numbers above as they have been treated for discrepancies 89 | 90 | #### Contributing 91 | 92 | - If you notice issues, have questions or want to suggest enhancements, please raise an issue in the repo. 93 | 94 | #### Quick Links 95 | 96 | A more detailed note of the columns present in the data may be found in the json documentation 97 | 98 | - [Documentation](https://api.covid19india.org/documentation) 99 | 100 | 101 | .................................................... 102 | -------------------------------------------------------------------------------- /src/build_raw_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd # pylint: disable=import-error 2 | import re 3 | from pathlib import Path 4 | import logging 5 | import sys 6 | import os 7 | from urllib.error import HTTPError 8 | # Set logging level 9 | logging.basicConfig(stream=sys.stdout, 10 | format="%(message)s", 11 | level=logging.INFO) 12 | 13 | def fetch_raw_data_from_api(): 14 | ''' 15 | Read all raw data and death and recovery files 16 | Pass the latest version of raw data 17 | ''' 18 | i = 1 19 | raw_d = [] 20 | while True: 21 | try: 22 | url = f"https://api.covid19india.org/csv/latest/raw_data{i}.csv" 23 | df = pd.read_csv(url) 24 | df.to_csv(f'./tmp/csv/latest/raw_data{i}.csv',index=False) 25 | raw_d.append(df) 26 | logging.info(f"Fetched raw_data{i} ") 27 | i = i+1 28 | except HTTPError: 29 | current_ver = i-1 30 | break 31 | 32 | death_rec = [] 33 | logging.info(f"Fetching deaths_and_recoveries") 34 | url = f"https://api.covid19india.org/csv/latest/death_and_recovered" 35 | df = pd.read_csv(f"{url}1.csv") 36 | death_rec.append(df) 37 | df.to_csv('./tmp/csv/latest/death_and_recovered1.csv',index=False) 38 | df = pd.read_csv(f"{url}2.csv") 39 | death_rec.append(df) 40 | df.to_csv('./tmp/csv/latest/death_and_recovered2.csv',index=False) 41 | 42 | return raw_d,death_rec,current_ver 43 | 44 | def fetch_raw_data(): 45 | ''' 46 | Read all raw data and death and recovery files 47 | Return the latest number of raw data files 48 | ''' 49 | raw_d = [] 50 | death_rec = [] 51 | fpath = Path('tmp/csv/latest') 52 | 53 | i = 1 54 | while True: 55 | try: 56 | df = pd.read_csv(fpath / f"raw_data{i}.csv") 57 | raw_d.append(df) 58 | logging.info(f"Fetched raw_data{i} ") 59 | i = i+1 60 | except FileNotFoundError: 61 | current_ver = i-1 62 | break 63 | 64 | i = 1 65 | while True: 66 | try: 67 | df = pd.read_csv(fpath / f"death_and_recovered{i}.csv") 68 | death_rec.append(df) 69 | logging.info(f"Fetched death_and_recovered{i} ") 70 | i = i+1 71 | except FileNotFoundError: 72 | break 73 | 74 | logging.info(f"Data read complete") 75 | 76 | return raw_d,death_rec,current_ver 77 | 78 | def fix_rawdata1and2(raw,rec,col_list,sheet_version): 79 | ''' 80 | Raw Data 1 and 2 had different format 81 | Select necessary columns and change data types 82 | Add death and recovery data to raw_data 83 | ''' 84 | print(f"V{sheet_version} Shape \t: {raw.shape}") 85 | 86 | # Only choose the valid current statuses 87 | raw = raw[raw['Current Status'].isin( ['Hospitalized','Recovered','Deceased','Migrated','Migrated_Other'])].copy() 88 | 89 | #Prepare neceassary columns 90 | raw['Num Cases'] = 1 91 | raw['Entry_ID'] = 0 92 | raw['Current Status'] = "Hospitalized" 93 | raw = raw.fillna('') 94 | raw = raw[col_list] 95 | 96 | # If Detected State is not available, entry is invalid 97 | raw = raw[raw['Detected State'] != ''].copy() 98 | 99 | # Convert Date Announced string to datetime 100 | raw['Date Announced'] = pd.to_datetime(raw['Date Announced'],format='%d/%m/%Y') 101 | 102 | 103 | # Add Sheet Version Column 104 | raw['Sheet_Version'] = sheet_version 105 | 106 | # Only choose the valid current statuses 107 | rec = rec[rec['Patient_Status'].isin(['Hospitalized','Recovered','Deceased','Migrated','Migrated_Other'])].copy() 108 | 109 | # Prepare necessary columns 110 | rec['Num Cases'] = 1 111 | rec['Entry_ID'] = 0 112 | rec['Current Status'] = rec['Patient_Status'] 113 | rec['Date Announced'] = rec['Date'] 114 | rec['State code'] = rec['Statecode'] 115 | rec['Detected City'] = rec['City'] 116 | rec['Status Change Date'] = '' 117 | rec['Contracted from which Patient (Suspected)'] = '' 118 | rec['Detected State'] = rec['State'] 119 | rec['Detected District'] = rec['District'] 120 | rec['Patient Number'] = rec['Patient_Number (Could be mapped later)'] 121 | rec['State Patient Number'] = '' 122 | rec['Type of transmission'] = '' 123 | 124 | rec = rec.fillna('') 125 | rec = rec[col_list] 126 | 127 | # If Detected State is not available, entry is invalid 128 | rec = rec[rec['Detected State'] != ''].copy() 129 | 130 | # Convert Date column from string to date 131 | rec['Date Announced'] = pd.to_datetime(rec['Date Announced'],format='%d/%m/%Y') 132 | 133 | # Add sheet version 134 | rec['Sheet_Version'] = sheet_version 135 | 136 | # Add deaths and recoveries to raw data 137 | raw = pd.concat([raw,rec],sort=True) 138 | 139 | return raw 140 | 141 | 142 | def merge_alldata(current_ver): 143 | ''' 144 | Merge it all together 145 | ''' 146 | col_list = ['Entry_ID', 'State Patient Number', 'Date Announced', 'Age Bracket', 147 | 'Gender', 'Detected City', 'Detected District', 'Detected State', 148 | 'State code', 'Num Cases', 'Current Status', 149 | 'Contracted from which Patient (Suspected)', 'Notes', 'Source_1', 150 | 'Source_2', 'Source_3', 'Nationality', 'Type of transmission', 151 | 'Status Change Date', 'Patient Number'] 152 | 153 | allraw = fix_rawdata1and2(raw_d[0],death_rec[0],col_list,sheet_version=1) 154 | tmp = fix_rawdata1and2(raw_d[1],death_rec[1],col_list,sheet_version=2) 155 | allraw = pd.concat([allraw,tmp],sort=True) 156 | 157 | for i in range(2,current_ver): 158 | print(f"V{i+1} Shape \t: {tmp.shape}") 159 | 160 | tmp = raw_d[i] 161 | tmp = tmp.fillna('') 162 | 163 | # Remove rows that doesn't have 164 | # any State mentioned. 165 | # This handles the situation at 166 | # the tail of most recent sheet 167 | tmp = tmp[tmp['Detected State'] != ''].copy() 168 | 169 | # Select only necessary columns 170 | tmp = tmp[col_list] 171 | # Convert date string to datetime 172 | tmp['Date Announced'] = pd.to_datetime(tmp['Date Announced'],format='%d/%m/%Y') 173 | # Add sheet version 174 | tmp['Sheet_Version'] = i+1 175 | 176 | allraw = pd.concat([allraw,tmp],sort=True) 177 | 178 | # Try to fix age to float 179 | allraw['Age Bracket'] = allraw['Age Bracket'].map(lambda x : fix_age(x)) 180 | # Try to fix gender column 181 | allraw['Gender'] = allraw['Gender'].map(lambda x : fix_gender(x)) 182 | 183 | print(f"Raw Data Shape \t: {allraw.shape}") 184 | return allraw 185 | 186 | def fix_age(age): 187 | ''' 188 | Age entries are sometimes entered in months. 189 | Change them to fraction 190 | ''' 191 | rgx_month = re.compile(r"([0-9]*)( month?.)") 192 | rgx_day = re.compile(r"([0-9]*)( day?.)") 193 | res_month = rgx_month.search(str(age).lower()) 194 | res_day = rgx_day.search(str(age).lower()) 195 | if res_month: 196 | age_corr = float(res_month.group(1))/12 197 | return round(age_corr,2) 198 | elif res_day: 199 | age_corr = float(res_day.group(1))/365.25 200 | return round(age_corr,2) 201 | return float(age) 202 | 203 | def fix_gender(g): 204 | ''' 205 | Fix any invalid entries in gender column 206 | ''' 207 | rgx_F = re.compile(r"[w,W]|[f,F]emale") 208 | rgx_M = re.compile(r"[m,M]ale") 209 | g = str(g) 210 | g = re.sub(rgx_F,"F",g) 211 | g = re.sub(rgx_M,"M",g) 212 | 213 | return g 214 | 215 | def compare_with_gospel(): 216 | ''' 217 | Till April 26th, the districtwise sheet was managed 218 | separately. i.e, Raw Data till then do not truly represent 219 | the district values till then. 220 | This function compares the entries in Raw Data with gospel 221 | Note that this function ignores the blank districts. 222 | ''' 223 | # Read merged data 224 | df = pd.read_csv('./tmp/csv/latest/raw_data.csv',low_memory=False) 225 | df.head() 226 | 227 | df['Date Announced'] = pd.to_datetime(df['Date Announced']) 228 | df = df[df['Date Announced'] <= '2020-04-26'] 229 | df['District_Key'] = df['State code'] + "_" + df['Detected District'] 230 | df['Num Cases'] = pd.to_numeric(df['Num Cases'], errors='coerce') 231 | 232 | dis_counts = pd.pivot_table(df,values = 'Num Cases', 233 | index = 'District_Key', 234 | columns='Current Status', 235 | aggfunc = sum).reset_index() 236 | 237 | dis_counts.rename(columns={'Hospitalized':'Confirmed'},inplace=True) 238 | 239 | # Read gospel 240 | url = "https://raw.githubusercontent.com/covid19india/api/gh-pages/csv/latest/districts_26apr_gospel.csv" 241 | gospel = pd.read_csv(url) 242 | 243 | compare = pd.merge(gospel,dis_counts,on='District_Key', suffixes=("_gospel","_v1v2")) 244 | 245 | compare.fillna(0,inplace=True) 246 | 247 | compare['Conf_Diff'] = compare['Confirmed_gospel'] - compare['Confirmed_v1v2'] 248 | compare['Reco_Diff'] = compare['Recovered_gospel'] - compare['Recovered_v1v2'] 249 | compare['Dece_Diff'] = compare['Deceased_gospel'] - compare['Deceased_v1v2'] 250 | 251 | compare.to_csv("./tmp/csv/compare_gospel_v1v2.csv",index=False) 252 | logging.info('Comparison file saved as ./tmp/csv/compare_gospel_v1v2.csv') 253 | 254 | return compare 255 | 256 | if __name__ == "__main__": 257 | logging.info('''----------------------------------------------------------------------''') 258 | logging.info('''Build one true raw data''') 259 | logging.info('''----------------------------------------------------------------------''') 260 | 261 | os.makedirs('./tmp/csv/latest/',exist_ok=True) 262 | 263 | try: 264 | # raw_d,death_rec,current_ver = fetch_raw_data() 265 | # If remote fetch is required 266 | raw_d,death_rec,current_ver = fetch_raw_data_from_api() 267 | except Exception as e: 268 | logging.error(f"Error while reading the files") 269 | raise 270 | logging.info('''----------------------------------------------------------------------''') 271 | 272 | allraw = merge_alldata(current_ver) 273 | allraw.to_csv('./tmp/csv/latest/raw_data.csv',index=False) 274 | logging.info('''----------------------------------------------------------------------''') 275 | logging.info('''Raw Data saved''') 276 | logging.info('''----------------------------------------------------------------------''') 277 | logging.info('''Comparing with Gospel''') 278 | _ = compare_with_gospel() 279 | logging.info('''----------------------------------------------------------------------''') 280 | -------------------------------------------------------------------------------- /src/obsolete/geocoder.py_bak: -------------------------------------------------------------------------------- 1 | from mapbox import Geocoder 2 | 3 | import json 4 | import urllib 5 | import time 6 | from random import gauss 7 | import logging as logger 8 | logger.basicConfig(level=logger.ERROR) 9 | 10 | import os 11 | import sys 12 | 13 | def fetch_data( 14 | path="https://api.covid19india.org/resources/resources.json", geojson=False 15 | ): 16 | request = urllib.request.urlopen(path) 17 | data = json.load(request) 18 | if geojson: 19 | return data 20 | 21 | inputDict = data["resources"] 22 | return inputDict 23 | 24 | 25 | def save_data(save_this, fname="output"): 26 | with open("./tmp/resources/" + fname + ".json", "w") as json_file: 27 | json.dump(save_this, json_file, indent=4) 28 | 29 | 30 | class EssentialsConverter: 31 | def __init__(self): 32 | # Public API. So commiting with code. No big secret here. 33 | 34 | # self.access_token = "pk.eyJ1IjoiYXNobWFwNGdkIiwiYSI6ImNrOXBjb2k2dDA5YW4zb24xb3A2cWs5YXYifQ.3qtCEWPKAOEftYEEUDfDSQ" 35 | self.access_token = "pk.eyJ1IjoiYXNocm9uYXZvbmEiLCJhIjoiY2thYjRtY2hmMDkyeDJ0bzg0cHF5dTh5diJ9.10oEwG3nFpYrhYy-LufSuA" 36 | self.coder = Geocoder(access_token=self.access_token) 37 | self.cityDict = {} 38 | self.cityList = [] 39 | self.processedBatch = [] 40 | self.failed = [] 41 | self.failed_ids = [] 42 | self.failed_cities = [] 43 | self.gaussian = [] 44 | self._api = 0 45 | 46 | @property 47 | def request_ctr(self): 48 | self._api += 1 49 | 50 | @property 51 | def rate_limit_exceeded(self): 52 | return self._api and not self._api % 600 53 | 54 | def populate(self, dir="./tmp/resources/", fromFile=False): 55 | if fromFile: 56 | try: 57 | with open(dir + "cityData.json") as c_list: 58 | data = json.load(c_list) 59 | with open(dir + "debug.json") as d_file: 60 | debug = json.load(d_file) 61 | except FileNotFoundError: 62 | logger.error('Wanted to access cityData and debug, but not found') 63 | pass 64 | 65 | self.cityDict = data["cityboundaries"] 66 | self.cityList = data["cities"] 67 | 68 | self.gaussian = debug["gaussian"] 69 | self.failed_cities = debug["failed_cities"] 70 | self.failed_ids = debug["failed_ids"] 71 | self.failed = debug["failed_entries"] 72 | else: # read in city list and city dicts from other sources like the google sheet 73 | logger.warning('CityData file not used') 74 | pass 75 | 76 | def generate_geojson(self, oldData=None): 77 | update = [] 78 | if oldData: 79 | update = oldData["features"] 80 | update += self.processedBatch 81 | 82 | geojson = { 83 | "type": "FeatureCollection", 84 | "lastupdated": time.ctime(), 85 | "features": update, 86 | } 87 | 88 | return geojson 89 | def check_city(self, entry): 90 | city = " ".join((entry["city"], entry["state"])) 91 | 92 | if city not in self.cityList: 93 | self.make_boundaries(city) 94 | self.cityList.append(city) 95 | 96 | def make_boundaries(self, city): 97 | lvl1 = ["district"] 98 | lvl2 = ["place"] 99 | lvl3 = ["place", "locality", "neighborhood"] 100 | 101 | self.request_ctr 102 | response = self.coder.forward(city, types=lvl1, country=["in"], limit=1) 103 | 104 | if not response.json()["features"]: 105 | self.request_ctr 106 | response = self.coder.forward(city, types=lvl2, country=["in"], limit=1) 107 | 108 | # if not response.json()["features"]: 109 | # self.request_ctr 110 | # response = self.coder.forward(city, types=lvl2, country=["in"], limit=1) 111 | 112 | if not response.json()["features"]: 113 | self.request_ctr 114 | response = self.coder.forward( 115 | city, types=lvl3, country=["in"], limit=1 116 | ) 117 | 118 | if not response.json()["features"]: 119 | self.request_ctr 120 | response = self.coder.forward(city, country=["in"], limit=1) 121 | 122 | if not response.json()["features"]: 123 | self.failed_cities.append(city) 124 | return 125 | 126 | feat = response.json()["features"][0] 127 | city_center = feat["center"] 128 | 129 | if "bbox" in feat.keys(): 130 | city_bbox = feat["bbox"] 131 | else: 132 | city_bbox = [] 133 | 134 | self.cityDict[city] = {"bbox": city_bbox, "center": city_center} 135 | 136 | logger.info(f"Boundaries for {city} has been added successfully") 137 | 138 | @staticmethod 139 | def get_icon(category): 140 | if category == "Accommodation and Shelter Homes": 141 | return "homes" 142 | elif category == "Ambulance": 143 | return "ambulance" 144 | elif category == "Community Kitchen": 145 | return "kitchen" 146 | elif category == "CoVID-19 Testing Lab": 147 | return "lab" 148 | elif category == "Delivery [Vegetables, Fruits, Groceries, Medicines, etc.]": 149 | return "delivery" 150 | elif category == "Fire Brigade": 151 | return "fire" 152 | elif category == "Free Food": 153 | return "food" 154 | elif category == "Fundraisers": 155 | return "fund" 156 | elif category == "Government Helpline": 157 | return "helpline" 158 | elif category == "Hospitals and Centers": 159 | return "hospital" 160 | elif category == "Mental well being and Emotional Support": 161 | return "wellbeing" 162 | elif category == "Police": 163 | return "police" 164 | elif category == "Senior Citizen Support": 165 | return "seniors" 166 | elif category == "Transportation": 167 | return "transport" 168 | elif category == "Quarantine Facility": 169 | return "quarantine" 170 | elif category == "Other": 171 | return "other" 172 | 173 | else: 174 | return "unknown" 175 | 176 | @staticmethod 177 | def scrape_url(url, start=33, end=None): 178 | reg = url[start:end].split(",") 179 | 180 | geom = {"type": "Point", "coordinates": [reg[1], reg[0]]} 181 | return geom 182 | 183 | def make_feature(self, entry): 184 | url_format = "http://www.google.com/maps/place/" 185 | 186 | # Parse entry data 187 | i = entry["recordid"] 188 | name = entry["nameoftheorganisation"] 189 | desc = entry["descriptionandorserviceprovided"] 190 | category = entry["category"] 191 | state = entry["state"] 192 | phone = entry["phonenumber"] 193 | contact = entry["contact"] 194 | 195 | # Declare new props 196 | q_name = "" 197 | q_addr = "" 198 | geom = {} 199 | 200 | # Set flags 201 | icon = self.get_icon(entry["category"]) 202 | isHealthcare = 0 203 | 204 | if category == "CoVID-19 Testing Lab" or category == "Hospitals and Centers": 205 | isHealthcare = 1 206 | 207 | city = " ".join((entry["city"], entry["state"])) 208 | query = ", ".join((entry["nameoftheorganisation"], entry["city"])) 209 | 210 | maps = False 211 | if url_format in contact: 212 | maps = True 213 | 214 | 215 | if "PAN" in city: 216 | if "India" in city: 217 | query = "India" 218 | state = "PAN India" 219 | else: 220 | query = entry["city"] 221 | state = " ".join(["PAN", state]) 222 | logger.info("Pan entry saved as ", state) 223 | 224 | 225 | # Skipped entries 226 | 227 | if city not in self.cityDict: 228 | self.failed.append(entry) 229 | self.failed_ids.append(i) 230 | return 231 | 232 | c_bbox = self.cityDict[city]["bbox"] 233 | c_center = self.cityDict[city]["center"] 234 | 235 | 236 | if not maps: 237 | if c_bbox != []: 238 | self.request_ctr 239 | resp = self.coder.forward(query, country=["in"], bbox=c_bbox, limit=1) 240 | else: 241 | self.request_ctr 242 | resp = self.coder.forward(query, country=["in"], limit=1) 243 | 244 | target = resp.geojson() 245 | 246 | # Get data 247 | if target["features"]: # condition -> non empty response 248 | geom = target["features"][0]["geometry"] 249 | q_name = target["features"][0]["text"] 250 | q_addr = target["features"][0]["place_name"] 251 | else: # else -> empty response - use big brain trickery 252 | self.gaussian.append(i) 253 | if c_bbox: 254 | sd = min(abs(c_bbox[0] - c_bbox[2]) / 8, abs(c_bbox[1] - c_bbox[3]) / 8) 255 | else: 256 | sd = c_center[0] * 0.0004 257 | 258 | lng = gauss(c_center[0], sd) 259 | lat = gauss(c_center[1], sd) 260 | 261 | geom = {"type": "Point", "coordinates": [lng, lat]} 262 | q_addr = city 263 | q_name = "" 264 | 265 | 266 | if url_format in contact: 267 | geom = self.scrape_url(contact) 268 | self.request_ctr 269 | reverse = self.coder.reverse(geom["coordinates"][0], geom["coordinates"][1]) 270 | target = reverse.geojson() 271 | 272 | if target["features"]: 273 | q_name = target["features"][0]["text"] 274 | q_addr = target["features"][0]["place_name"] 275 | 276 | if "PAN" in city: 277 | if "India" in city: 278 | q_addr = "India" 279 | else: 280 | q_addr = ", ".join([entry["state"], "India"]) 281 | 282 | prop = { 283 | "recordid": i, 284 | "name": name, 285 | "desc": desc, 286 | "geoTag": q_name, 287 | "addr": q_addr, 288 | "state": state, 289 | "phone": phone, 290 | "contact": contact, 291 | "priority": isHealthcare, 292 | "icon": icon, 293 | } 294 | 295 | self.processedBatch.append( 296 | {"type": "Feature", "properties": prop, "geometry": geom} 297 | ) 298 | 299 | def process_entry(self, entry): 300 | self.check_city(entry) # Generates city data if its not been done yet. 301 | self.make_feature(entry) 302 | logger.info(f'Processed #{entry["recordid"]}') 303 | 304 | 305 | def main(): 306 | # Get the latest resources.json 307 | # entries = fetch_data() 308 | 309 | converter = EssentialsConverter() 310 | 311 | print('Geocoding begins') 312 | 313 | """Read the recently fetched resources.json copied to tmp folder""" 314 | with open("./tmp/resources/resources.json") as f: 315 | entries = json.load(f)["resources"] 316 | 317 | """Read in old geojson via url or filepath""" 318 | # old_entries = fetch_data(path="https://raw.githubusercontent.com/aswaathb/covid19india-react/publish/newResources/geoResources.json", geojson=True) 319 | processed_i = [] # cache recordid's for previously processed features 320 | try: 321 | 322 | with open("./tmp/resources/geoResources.json") as geo: 323 | old_entries = json.load(geo) 324 | 325 | for feature in old_entries["features"]: 326 | processed_i.append(int(feature["properties"]["recordid"])) 327 | 328 | # Load saved city boundaries 329 | converter.populate(fromFile=True) 330 | 331 | for idx, entry in enumerate(entries): 332 | if int(entry["recordid"]) not in processed_i: 333 | # convert only the missing entries 334 | converter.process_entry(entry) 335 | 336 | if converter.rate_limit_exceeded: 337 | logger.info("API rate limit: Minute delay could be added") 338 | time.sleep(61) 339 | 340 | # Feed in processed_entries as oldData to append new batch to previously geocoded entries 341 | feature_collection = converter.generate_geojson( 342 | oldData=old_entries 343 | ) 344 | 345 | except FileNotFoundError: 346 | logger.warning("Prefetched file not found. All entries will be geocoded.") 347 | for idx, entry in enumerate(entries): 348 | converter.process_entry(entry) 349 | 350 | if converter.rate_limit_exceeded: 351 | logger.info("API rate limit: Minute delay could be added") 352 | time.sleep(61) 353 | 354 | # Feed in processed_entries as oldData to append new batch to previously geocoded entries 355 | feature_collection = converter.generate_geojson( 356 | oldData=None 357 | ) 358 | 359 | except Exception as e: 360 | logger.error('Something went wrong ',e) 361 | sys.exit("geoResources.json couldn't compare") 362 | 363 | debug = { 364 | "gaussian": sorted(list(set(converter.gaussian))), 365 | "failed_cities": sorted(list(set(converter.failed_cities))), 366 | "failed_ids": sorted(list(set(converter.failed_ids))), 367 | "failed_entries": converter.failed, 368 | } 369 | 370 | city_data = {"cities": converter.cityList, "cityboundaries": converter.cityDict} 371 | 372 | save_data(feature_collection, "geoResources") 373 | save_data(debug, "debug") 374 | save_data(city_data, "cityData") 375 | 376 | print(f'{len(converter.processedBatch)} records were processed.\n{converter._api} api calls were made') 377 | 378 | if __name__ == "__main__": 379 | main() 380 | -------------------------------------------------------------------------------- /LICENSE_DATA: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. 396 | -------------------------------------------------------------------------------- /src/parser_v3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | import logging 5 | import json 6 | import yaml 7 | from collections import defaultdict, OrderedDict 8 | from datetime import datetime, timedelta 9 | from pathlib import Path 10 | 11 | # Set logging level 12 | logging.basicConfig(handlers=[logging.NullHandler()], 13 | format='%(message)s', 14 | level=logging.INFO) 15 | 16 | # Current date in India 17 | INDIA_DATE = datetime.strftime( 18 | datetime.utcnow() + timedelta(hours=5, minutes=30), '%Y-%m-%d') 19 | 20 | INPUT_DIR = Path('tmp') 21 | # Contains state codes to be used as API keys 22 | META_DATA = INPUT_DIR / 'misc.json' 23 | # Contains list of geographical districts 24 | DISTRICT_LIST = INPUT_DIR / 'state_district_wise.json' 25 | # All raw_data's 26 | RAW_DATA = 'raw_data{n}.json' 27 | # Contains deaths and recoveries for entries in raw_data1 and raw_data2 28 | OUTCOME_DATA = 'deaths_recoveries{n}.json' 29 | # Contains district data on 26th April 30 | DISTRICT_DATA_GOSPEL = INPUT_DIR / 'csv' / 'latest' / 'districts_26apr_gospel.csv' 31 | GOSPEL_DATE = '2020-04-26' 32 | # India testing data 33 | ICMR_TEST_DATA = INPUT_DIR / 'data.json' 34 | # States testing data 35 | STATE_TEST_DATA = INPUT_DIR / 'state_test_data.json' 36 | # District testing data 37 | DISTRICT_TEST_DATA = INPUT_DIR / 'csv' / 'latest' / 'district_testing.csv' 38 | ## For adding metadata 39 | # For state notes and last updated 40 | STATE_WISE = INPUT_DIR / 'data.json' 41 | # For district notes 42 | DISTRICT_WISE = INPUT_DIR / 'state_district_wise.json' 43 | 44 | OUTPUT_DIR = Path('tmp', 'v3') 45 | OUTPUT_MIN_DIR = OUTPUT_DIR / 'min' 46 | OUTPUT_DATA_PREFIX = 'data' 47 | OUTPUT_TIMESERIES_FILENAME = 'timeseries' 48 | 49 | # Two digit state codes 50 | STATE_CODES = {} 51 | # State codes to state names map (capitalized appropriately) 52 | STATE_NAMES = {} 53 | # State/district populations 54 | STATE_POPULATIONS = {} 55 | DISTRICT_POPULATIONS = defaultdict(dict) 56 | # Code corresponding to MoHFW's 'Unassigned States' in sheet 57 | UNASSIGNED_STATE_CODE = 'UN' 58 | # Dict containing geographical districts 59 | DISTRICTS_DICT = defaultdict(dict) 60 | # District key to give to unkown district values in raw_data 61 | UNKNOWN_DISTRICT_KEY = 'Unknown' 62 | 63 | PRIMARY_STATISTICS = ['confirmed', 'deceased', 'recovered'] 64 | 65 | RAW_DATA_MAP = { 66 | 'hospitalized': 'confirmed', 67 | 'deceased': 'deceased', 68 | 'recovered': 'recovered', 69 | 'migrated_other': 'migrated', 70 | } 71 | 72 | # Log statements width 73 | PRINT_WIDTH = 70 74 | 75 | # Nested default dict of dict 76 | ddict = lambda: defaultdict(ddict) 77 | # Dictionaries which stored final parsed data 78 | data = ddict() 79 | timeseries = ddict() 80 | 81 | 82 | def parse_state_metadata(raw_data): 83 | for i, entry in enumerate(raw_data['state_meta_data']): 84 | # State name with sheet capitalization 85 | state_name = entry['stateut'].strip() 86 | # State code caps 87 | state_code = entry['abbreviation'].strip().upper() 88 | STATE_CODES[state_name.lower()] = state_code 89 | STATE_NAMES[state_code] = state_name 90 | # State population 91 | try: 92 | population = int(entry['population'].strip()) 93 | except ValueError: 94 | if entry['population']: 95 | logging.warning('[L{}] [Bad population: {}] {}'.format( 96 | i + 2, entry['population'], state_code)) 97 | continue 98 | STATE_POPULATIONS[state_code] = population 99 | 100 | 101 | def parse_district_list(raw_data): 102 | for i, entry in enumerate(raw_data.values()): 103 | state = entry['statecode'].strip().upper() 104 | if state not in STATE_CODES.values(): 105 | logging.warning('[L{}] Bad state: {}'.format(i + 2, entry['statecode'])) 106 | continue 107 | if 'districtData' not in entry: 108 | continue 109 | 110 | for district in entry['districtData']: 111 | district = district.strip() 112 | DISTRICTS_DICT[state][district.lower()] = district 113 | 114 | 115 | def parse_district(district, state): 116 | district = district.strip() 117 | expected = True 118 | if not district or district.lower() == 'unknown': 119 | district = UNKNOWN_DISTRICT_KEY 120 | elif district.lower() in DISTRICTS_DICT[state]: 121 | district = DISTRICTS_DICT[state][district.lower()] 122 | else: 123 | expected = False 124 | return district, expected 125 | 126 | 127 | def parse_district_metadata(raw_data): 128 | for i, entry in enumerate(raw_data['district_meta_data']): 129 | # State code 130 | state = entry['statecode'].strip().upper() 131 | if state not in STATE_CODES.values(): 132 | logging.warning('[L{}] Bad state: {}'.format(i + 2, state)) 133 | continue 134 | # District name with sheet capitalization 135 | district, expected = parse_district(entry['district'], state) 136 | if not expected: 137 | logging.warning('[L{}] [{}] Unexpected district: {}'.format( 138 | i + 2, state, district)) 139 | # District population 140 | try: 141 | population = int(entry['population'].strip()) 142 | except ValueError: 143 | if entry['population']: 144 | logging.warning('[L{}] [Bad population: {}] {}: {}'.format( 145 | i + 2, entry['population'], state, district)) 146 | continue 147 | DISTRICT_POPULATIONS[state][district] = population 148 | 149 | 150 | def inc(ref, key, count): 151 | if not isinstance(ref[key], int): 152 | # Initialize with 0 153 | ref[key] = 0 154 | # Increment 155 | ref[key] += count 156 | 157 | 158 | def parse(raw_data, i): 159 | for j, entry in enumerate(raw_data['raw_data']): 160 | state_name = entry['detectedstate'].strip().lower() 161 | try: 162 | state = STATE_CODES[state_name] 163 | except KeyError: 164 | # Entries with empty state names are discarded 165 | if state_name: 166 | # Unrecognized state entries are discarded and logged 167 | logging.warning('[L{}] [{}] [Bad state: {}] {}'.format( 168 | j + 2, entry['dateannounced'], entry['detectedstate'], 169 | entry['numcases'])) 170 | continue 171 | 172 | try: 173 | fdate = datetime.strptime(entry['dateannounced'].strip(), '%d/%m/%Y') 174 | date = datetime.strftime(fdate, '%Y-%m-%d') 175 | if date > INDIA_DATE: 176 | # Entries from future dates will be ignored 177 | logging.warning('[L{}] [Future date: {}] {}: {} {}'.format( 178 | j + 2, entry['dateannounced'], entry['detectedstate'], 179 | entry['detecteddistrict'], entry['numcases'])) 180 | continue 181 | except ValueError: 182 | # Bad date 183 | logging.warning('[L{}] [Bad date: {}] {}: {} {}'.format( 184 | j + 2, entry['dateannounced'], entry['detectedstate'], 185 | entry['detecteddistrict'], entry['numcases'])) 186 | continue 187 | 188 | district, expected = parse_district(entry['detecteddistrict'], state) 189 | if not expected: 190 | # Print unexpected district names 191 | logging.warning('[L{}] [{}] [Unexpected district: {} ({})] {}'.format( 192 | j + 2, entry['dateannounced'], district, state, entry['numcases'])) 193 | 194 | try: 195 | count = int(entry['numcases'].strip()) 196 | except ValueError: 197 | logging.warning('[L{}] [{}] [Bad numcases: {}] {}: {}'.format( 198 | j + 2, entry['dateannounced'], entry['numcases'], state, district)) 199 | continue 200 | 201 | if count: 202 | try: 203 | # All rows in v1 and v2 are confirmed cases 204 | statistic = 'confirmed' if i < 3 else RAW_DATA_MAP[ 205 | entry['currentstatus'].strip().lower()] 206 | 207 | inc(data[date]['TT']['delta'], statistic, count) 208 | inc(data[date][state]['delta'], statistic, count) 209 | # Don't parse old district data since it's unreliable 210 | if i > 2 and date > GOSPEL_DATE and state != UNASSIGNED_STATE_CODE: 211 | inc(data[date][state]['districts'][district]['delta'], statistic, 212 | count) 213 | 214 | except KeyError: 215 | # Unrecognized status 216 | logging.warning('[L{}] [{}] [Bad currentstatus: {}] {}: {} {}'.format( 217 | j + 2, entry['dateannounced'], entry['currentstatus'], state, 218 | district, entry['numcases'])) 219 | 220 | 221 | def parse_outcome(outcome_data, i): 222 | for j, entry in enumerate(outcome_data['deaths_recoveries']): 223 | state_name = entry['state'].strip().lower() 224 | try: 225 | state = STATE_CODES[state_name] 226 | except KeyError: 227 | # Entries with empty state names are discarded 228 | if state_name: 229 | # Unrecognized state entries are discarded and logged 230 | logging.warning('[L{}] [{}] [Bad state: {}]'.format( 231 | j + 2, entry['date'], entry['state'])) 232 | continue 233 | 234 | try: 235 | fdate = datetime.strptime(entry['date'].strip(), '%d/%m/%Y') 236 | date = datetime.strftime(fdate, '%Y-%m-%d') 237 | if date > INDIA_DATE: 238 | # Entries from future dates will be ignored 239 | logging.warning('[L{}] [Future date: {}] {}'.format( 240 | j + 2, entry['date'], state)) 241 | continue 242 | except ValueError: 243 | # Bad date 244 | logging.warning('[L{}] [Bad date: {}] {}'.format(j + 2, entry['date'], 245 | state)) 246 | continue 247 | 248 | district, expected = parse_district(entry['district'], state) 249 | if not expected: 250 | # Print unexpected district names 251 | logging.warning('[L{}] [{}] [Unexpected district: {} ({})] {}'.format( 252 | j + 2, entry['date'], district, state, entry['numcases'])) 253 | 254 | try: 255 | statistic = RAW_DATA_MAP[entry['patientstatus'].strip().lower()] 256 | 257 | inc(data[date]['TT']['delta'], statistic, 1) 258 | inc(data[date][state]['delta'], statistic, 1) 259 | ## Don't parse old district data since it's unreliable 260 | # inc(data[date][state]['districts'][district]['delta'], statistic, 261 | # 1) 262 | except KeyError: 263 | # Unrecognized status 264 | logging.warning('[L{}] [{}] [Bad patientstatus: {}] {}: {}'.format( 265 | j + 2, entry['date'], entry['patientstatus'], state, district)) 266 | 267 | 268 | def parse_district_gospel(reader): 269 | for i, row in enumerate(reader): 270 | state = row['State_Code'].strip().upper() 271 | if state not in STATE_CODES.values(): 272 | logging.warning('[{}] Bad state: {}'.format(i, state)) 273 | continue 274 | district, expected = parse_district(row['District'], state) 275 | if not expected: 276 | # Print unexpected district names 277 | logging.warning('[{}] Unexpected district: {} {}'.format( 278 | i, state, district)) 279 | 280 | for statistic in PRIMARY_STATISTICS: 281 | count = int(row[statistic.capitalize()] or 0) 282 | if count: 283 | data[GOSPEL_DATE][state]['districts'][district]['total'][ 284 | statistic] = count 285 | 286 | 287 | def parse_icmr(icmr_data): 288 | for j, entry in enumerate(icmr_data['tested']): 289 | count_str = entry['totalsamplestested'].strip() 290 | try: 291 | fdate = datetime.strptime(entry['testedasof'].strip(), '%d/%m/%Y') 292 | date = datetime.strftime(fdate, '%Y-%m-%d') 293 | if date > INDIA_DATE: 294 | # Entries from future dates will be ignored 295 | if count_str: 296 | # Log non-zero entries 297 | logging.warning('[L{}] [Future date: {}]'.format( 298 | j + 2, entry['testedasof'])) 299 | continue 300 | except ValueError: 301 | # Bad timestamp 302 | logging.warning('[L{}] [Bad date: {}]'.format(j + 2, 303 | entry['testedasof'])) 304 | continue 305 | 306 | try: 307 | count = int(count_str) 308 | except ValueError: 309 | logging.warning('[L{}] [{}] [Bad totalsamplestested: {}]'.format( 310 | j + 2, entry['testedasof'], entry['totalsamplestested'])) 311 | continue 312 | 313 | if count: 314 | data[date]['TT']['total']['tested'] = count 315 | data[date]['TT']['meta']['tested']['source'] = entry['source'].strip() 316 | data[date]['TT']['meta']['tested']['last_updated'] = date 317 | 318 | 319 | def parse_state_test(raw_data): 320 | for j, entry in enumerate(raw_data['states_tested_data']): 321 | count_str = entry['totaltested'].strip() 322 | try: 323 | fdate = datetime.strptime(entry['updatedon'].strip(), '%d/%m/%Y') 324 | date = datetime.strftime(fdate, '%Y-%m-%d') 325 | if date > INDIA_DATE: 326 | # Entries from future dates will be ignored 327 | if count_str: 328 | # Log non-zero entries 329 | logging.warning('[L{}] [Future date: {}] {}'.format( 330 | j + 2, entry['updatedon'], entry['state'])) 331 | continue 332 | except ValueError: 333 | # Bad date 334 | logging.warning('[L{}] [Bad date: {}] {}'.format(j + 2, 335 | entry['updatedon'], 336 | entry['state'])) 337 | continue 338 | 339 | state_name = entry['state'].strip().lower() 340 | try: 341 | state = STATE_CODES[state_name] 342 | except KeyError: 343 | # Entries having unrecognized state names are discarded 344 | logging.warning('[L{}] [{}] [Bad state: {}]'.format( 345 | j + 2, entry['updatedon'], entry['state'])) 346 | continue 347 | 348 | try: 349 | count = int(count_str) 350 | except ValueError: 351 | logging.warning('[L{}] [{}] [Bad totaltested: {}] {}'.format( 352 | j + 2, entry['updatedon'], entry['totaltested'], entry['state'])) 353 | continue 354 | 355 | if count: 356 | data[date][state]['total']['tested'] = count 357 | data[date][state]['meta']['tested']['source'] = entry['source1'].strip() 358 | data[date][state]['meta']['tested']['last_updated'] = date 359 | 360 | 361 | def column_str(n): 362 | alpha = '' 363 | while n > 0: 364 | n, rem = divmod(n - 1, 26) 365 | alpha = chr(65 + rem) + alpha 366 | return alpha 367 | 368 | 369 | def parse_district_test(reader): 370 | # Header row 371 | header = next(reader) 372 | # Store formatted dates 373 | dates = ['' for _ in header] 374 | # Columns >= 6 contain dates 375 | for j in range(6, len(header), 5): 376 | try: 377 | fdate = datetime.strptime(header[j].strip(), '%d/%m/%Y') 378 | date = datetime.strftime(fdate, '%Y-%m-%d') 379 | if date <= INDIA_DATE: 380 | # Only keep entries from present or past dates 381 | dates[j] = date 382 | except ValueError: 383 | # Bad date 384 | logging.warning('[{}] Bad date: {}'.format(column_str(j), header[j])) 385 | # Skip second row 386 | next(reader) 387 | for i, row in enumerate(reader): 388 | # Column 3 contains state name 389 | state_name = row[3].strip().lower() 390 | try: 391 | state = STATE_CODES[state_name] 392 | except KeyError: 393 | # Entries having unrecognized state names are discarded 394 | logging.warning('[L{}] Bad state: {}'.format(i + 3, row[3])) 395 | continue 396 | 397 | # Column 4 contains district name 398 | district, expected = parse_district(row[4], state) 399 | if not expected: 400 | # Print unexpected district names 401 | logging.warning('[L{}] Unexpected district: {} {}'.format( 402 | i + 3, state, district)) 403 | 404 | # Testing data starts from column 6 405 | for j in range(6, len(row), 5): 406 | # Date header 407 | date = dates[j] 408 | if not date: 409 | # Skip future date 410 | continue 411 | # | Tested | Positive | Negative | Source1 | Source2 | 412 | try: 413 | count = int(row[j].strip()) 414 | except ValueError: 415 | if row[j]: 416 | logging.warning('[L{} {}] [{}: {}] Bad Tested: {}'.format( 417 | i + 3, column_str(j), state, district, row[j])) 418 | continue 419 | # Use Source1 key as source 420 | source = row[j + 3].strip() 421 | if count: 422 | data[date][state]['districts'][district]['total']['tested'] = count 423 | data[date][state]['districts'][district]['meta']['tested'][ 424 | 'source'] = source 425 | data[date][state]['districts'][district]['meta']['tested'][ 426 | 'last_updated'] = date 427 | 428 | 429 | def fill_tested(): 430 | dates = sorted(data) 431 | for i, date in enumerate(dates): 432 | curr_data = data[date] 433 | 434 | # Initialize today's delta with today's cumulative 435 | for state, state_data in curr_data.items(): 436 | if 'total' in state_data and 'tested' in state_data['total']: 437 | state_data['delta']['tested'] = state_data['total']['tested'] 438 | 439 | if 'districts' not in state_data: 440 | continue 441 | 442 | for district, district_data in state_data['districts'].items(): 443 | if 'total' in district_data and 'tested' in district_data['total']: 444 | district_data['delta']['tested'] = district_data['total']['tested'] 445 | 446 | if i > 0: 447 | prev_date = dates[i - 1] 448 | prev_data = data[prev_date] 449 | for state, state_data in prev_data.items(): 450 | if 'total' in state_data and 'tested' in state_data['total']: 451 | if 'tested' in curr_data[state]['total']: 452 | # Subtract previous cumulative to get delta 453 | curr_data[state]['delta']['tested'] -= state_data['total'][ 454 | 'tested'] 455 | else: 456 | # Take today's cumulative to be same as yesterday's 457 | # cumulative if today's cumulative is missing 458 | curr_data[state]['total']['tested'] = state_data['total']['tested'] 459 | curr_data[state]['meta']['tested']['source'] = state_data['meta'][ 460 | 'tested']['source'] 461 | curr_data[state]['meta']['tested']['last_updated'] = state_data[ 462 | 'meta']['tested']['last_updated'] 463 | 464 | if 'districts' not in state_data: 465 | continue 466 | 467 | for district, district_data in state_data['districts'].items(): 468 | if 'total' in district_data and 'tested' in district_data['total']: 469 | if 'tested' in curr_data[state]['districts'][district]['total']: 470 | # Subtract previous cumulative to get delta 471 | curr_data[state]['districts'][district]['delta'][ 472 | 'tested'] -= district_data['total']['tested'] 473 | else: 474 | # Take today's cumulative to be same as yesterday's 475 | # cumulative if today's cumulative is missing 476 | curr_data[state]['districts'][district]['total'][ 477 | 'tested'] = district_data['total']['tested'] 478 | curr_data[state]['districts'][district]['meta']['tested'][ 479 | 'source'] = district_data['meta']['tested']['source'] 480 | curr_data[state]['districts'][district]['meta']['tested'][ 481 | 'last_updated'] = district_data['meta']['tested'][ 482 | 'last_updated'] 483 | 484 | 485 | def accumulate(start_after_date='', end_date='3020-01-30'): 486 | dates = sorted(data) 487 | for i, date in enumerate(dates): 488 | if date <= start_after_date: 489 | continue 490 | elif date > end_date: 491 | break 492 | curr_data = data[date] 493 | 494 | if i > 0: 495 | # Initialize today's cumulative with previous available 496 | prev_date = dates[i - 1] 497 | prev_data = data[prev_date] 498 | for state, state_data in prev_data.items(): 499 | for statistic in RAW_DATA_MAP.values(): 500 | if statistic in state_data['total']: 501 | inc(curr_data[state]['total'], statistic, 502 | state_data['total'][statistic]) 503 | 504 | if 'districts' not in state_data or date <= GOSPEL_DATE: 505 | # Old district data is already accumulated 506 | continue 507 | 508 | for district, district_data in state_data['districts'].items(): 509 | for statistic in RAW_DATA_MAP.values(): 510 | if statistic in district_data['total']: 511 | inc(curr_data[state]['districts'][district]['total'], statistic, 512 | district_data['total'][statistic]) 513 | 514 | # Add today's dailys to today's cumulative 515 | for state, state_data in curr_data.items(): 516 | if 'delta' in state_data: 517 | for statistic in RAW_DATA_MAP.values(): 518 | if statistic in state_data['delta']: 519 | inc(state_data['total'], statistic, state_data['delta'][statistic]) 520 | 521 | if 'districts' not in state_data or date <= GOSPEL_DATE: 522 | # Old district data is already accumulated 523 | continue 524 | 525 | for district, district_data in state_data['districts'].items(): 526 | if 'delta' in district_data: 527 | for statistic in RAW_DATA_MAP.values(): 528 | if statistic in district_data['delta']: 529 | inc(district_data['total'], statistic, 530 | district_data['delta'][statistic]) 531 | 532 | 533 | def fill_gospel_unknown(): 534 | # Gospel doesn't contain unknowns 535 | # Fill them based on gospel date state counts 536 | curr_data = data[GOSPEL_DATE] 537 | for state, state_data in curr_data.items(): 538 | if 'districts' not in state_data or 'total' not in state_data: 539 | # State had no cases yet 540 | continue 541 | 542 | sum_district_totals = defaultdict(lambda: 0) 543 | for district, district_data in state_data['districts'].items(): 544 | if 'total' in district_data: 545 | for statistic, count in district_data['total'].items(): 546 | sum_district_totals[statistic] += count 547 | 548 | for statistic in PRIMARY_STATISTICS: 549 | if statistic in state_data['total']: 550 | count = state_data['total'][statistic] 551 | if count != sum_district_totals[statistic]: 552 | # Counts don't match 553 | # We take Unknown district values = State - Sum(districts gospel) 554 | state_data['districts'][UNKNOWN_DISTRICT_KEY]['total'][ 555 | statistic] = count - sum_district_totals[statistic] 556 | 557 | 558 | def stripper(raw_data, dtype=ddict): 559 | # Remove empty entries 560 | new_data = dtype() 561 | for k, v in raw_data.items(): 562 | if isinstance(v, dict): 563 | v = stripper(v, dtype) 564 | if v: 565 | new_data[k] = v 566 | return new_data 567 | 568 | 569 | def add_populations(): 570 | # Add population data for states/districts 571 | for curr_data in data.values(): 572 | for state, state_data in curr_data.items(): 573 | try: 574 | state_pop = STATE_POPULATIONS[state] 575 | state_data['meta']['population'] = state_pop 576 | except KeyError: 577 | pass 578 | 579 | if 'districts' not in state_data: 580 | continue 581 | 582 | for district, district_data in state_data['districts'].items(): 583 | try: 584 | district_pop = DISTRICT_POPULATIONS[state][district] 585 | district_data['meta']['population'] = district_pop 586 | except KeyError: 587 | pass 588 | 589 | 590 | def generate_timeseries(districts=False): 591 | for date in sorted(data): 592 | curr_data = data[date] 593 | 594 | for state, state_data in curr_data.items(): 595 | for stype in ['total', 'delta']: 596 | if stype in state_data: 597 | for statistic, value in state_data[stype].items(): 598 | timeseries[state][date][stype][statistic] = value 599 | 600 | if not districts or 'districts' not in state_data or date <= GOSPEL_DATE: 601 | # Total state has no district data 602 | # District timeseries starts only from 26th April 603 | continue 604 | 605 | for district, district_data in state_data['districts'].items(): 606 | for stype in ['total', 'delta']: 607 | if stype in district_data: 608 | for statistic, value in district_data[stype].items(): 609 | timeseries[state]['districts'][district][date][stype][ 610 | statistic] = value 611 | 612 | 613 | def add_state_meta(raw_data): 614 | last_data = data[sorted(data)[-1]] 615 | for j, entry in enumerate(raw_data['statewise']): 616 | state = entry['statecode'].strip().upper() 617 | if state not in STATE_CODES.values(): 618 | # Entries having unrecognized state codes are discarded 619 | logging.warning('[L{}] [{}] Bad state: {}'.format( 620 | j + 2, entry['lastupdatedtime'], entry['statecode'])) 621 | continue 622 | 623 | try: 624 | fdate = datetime.strptime(entry['lastupdatedtime'].strip(), 625 | '%d/%m/%Y %H:%M:%S') 626 | except ValueError: 627 | # Bad timestamp 628 | logging.warning('[L{}] [Bad timestamp: {}] {}'.format( 629 | j + 2, entry['lastupdatedtime'], state)) 630 | continue 631 | 632 | last_data[state]['meta']['last_updated'] = fdate.isoformat() + '+05:30' 633 | if entry['statenotes']: 634 | last_data[state]['meta']['notes'] = entry['statenotes'].strip() 635 | 636 | 637 | def add_district_meta(raw_data): 638 | last_data = data[sorted(data)[-1]] 639 | for j, entry in enumerate(raw_data.values()): 640 | state = entry['statecode'].strip().upper() 641 | if state not in STATE_CODES.values(): 642 | # Entries having unrecognized state codes are discarded 643 | logging.warning('[L{}] Bad state: {}'.format(j + 2, entry['statecode'])) 644 | continue 645 | 646 | for district, district_data in entry['districtData'].items(): 647 | district, expected = parse_district(district, state) 648 | if not expected: 649 | logging.warning('[L{}] Unexpected district: {} {}'.format( 650 | j + 2, state, district)) 651 | 652 | if district_data['notes']: 653 | last_data[state]['districts'][district]['meta'][ 654 | 'notes'] = district_data['notes'].strip() 655 | 656 | 657 | def tally_statewise(raw_data): 658 | last_data = data[sorted(data)[-1]] 659 | # Check for extra entries 660 | logging.info('Checking for extra entries...') 661 | for state, state_data in last_data.items(): 662 | found = False 663 | for entry in raw_data['statewise']: 664 | if state == entry['statecode'].strip().upper(): 665 | found = True 666 | break 667 | if not found: 668 | logging.warning(yaml.dump(stripper({state: state_data}, dtype=dict))) 669 | logging.info('Done!') 670 | 671 | # Tally counts of entries present in statewise 672 | logging.info('Tallying final date counts...') 673 | for j, entry in enumerate(raw_data['statewise']): 674 | state = entry['statecode'].strip().upper() 675 | if state not in STATE_CODES.values(): 676 | continue 677 | 678 | try: 679 | fdate = datetime.strptime(entry['lastupdatedtime'].strip(), 680 | '%d/%m/%Y %H:%M:%S') 681 | except ValueError: 682 | # Bad timestamp 683 | logging.warning('[L{}] [Bad timestamp: {}] {}'.format( 684 | j + 2, entry['lastupdatedtime'], state)) 685 | continue 686 | 687 | for statistic in PRIMARY_STATISTICS: 688 | try: 689 | values = { 690 | 'total': 691 | int(entry[statistic if statistic != 'deceased' else 'deaths']. 692 | strip()), 693 | 'delta': 694 | int(entry['delta' + ( 695 | statistic if statistic != 'deceased' else 'deaths').strip()]) 696 | } 697 | except ValueError: 698 | logging.warning('[L{}] [{}] [Bad value for {}] {}'.format( 699 | j + 2, entry['lastupdatedtime'], statistic, state)) 700 | continue 701 | 702 | for stype in ['total', 'delta']: 703 | if values[stype]: 704 | parsed_value = last_data[state][stype][statistic] 705 | if not isinstance(parsed_value, int): 706 | parsed_value = 0 707 | if values[stype] != parsed_value: 708 | # Print mismatch between statewise and parser 709 | logging.warning('{} {} {}: (sheet: {}, parser: {})'.format( 710 | state, statistic, stype, values[stype], parsed_value)) 711 | 712 | 713 | def tally_districtwise(raw_data): 714 | last_data = data[sorted(data)[-1]] 715 | # Check for extra entries 716 | logging.info('Checking for extra entries...') 717 | for state, state_data in last_data.items(): 718 | if 'districts' not in state_data: 719 | continue 720 | state_name = STATE_NAMES[state] 721 | if state_name in raw_data: 722 | for district, district_data in state_data['districts'].items(): 723 | found = False 724 | for entryDistrict in raw_data[state_name]['districtData'].keys(): 725 | entryDistrict, _ = parse_district(entryDistrict, state) 726 | if district == entryDistrict: 727 | found = True 728 | break 729 | if not found: 730 | key = '{} ({})'.format(district, state) 731 | logging.warning(yaml.dump(stripper({key: district_data}, 732 | dtype=dict))) 733 | else: 734 | logging.warning(yaml.dump(stripper({state: state_data}, dtype=dict))) 735 | logging.info('Done!') 736 | 737 | # Tally counts of entries present in districtwise 738 | logging.info('Tallying final date counts...') 739 | for j, entry in enumerate(raw_data.values()): 740 | state = entry['statecode'].strip().upper() 741 | if state not in STATE_CODES.values(): 742 | continue 743 | 744 | for district, district_data in entry['districtData'].items(): 745 | district, _ = parse_district(district, state) 746 | for statistic in PRIMARY_STATISTICS: 747 | values = { 748 | 'total': district_data[statistic], 749 | 'delta': district_data['delta'][statistic] 750 | } 751 | for stype in ['total', 'delta']: 752 | if values[stype]: 753 | parsed_value = last_data[state]['districts'][district][stype][ 754 | statistic] 755 | if not isinstance(parsed_value, int): 756 | parsed_value = 0 757 | if values[stype] != parsed_value: 758 | # Print mismatch between districtwise and parser 759 | logging.warning('{} {} {} {}: (sheet: {}, parser: {})'.format( 760 | state, district, statistic, stype, values[stype], 761 | parsed_value)) 762 | 763 | 764 | if __name__ == '__main__': 765 | logging.info('-' * PRINT_WIDTH) 766 | logging.info('{:{align}{width}}'.format('PARSER V3 START', 767 | align='^', 768 | width=PRINT_WIDTH)) 769 | 770 | # Get possible state codes, populations 771 | logging.info('-' * PRINT_WIDTH) 772 | logging.info('Parsing state metadata...') 773 | with open(META_DATA, 'r') as f: 774 | logging.info('File: {}'.format(META_DATA.name)) 775 | raw_data = json.load(f) 776 | parse_state_metadata(raw_data) 777 | logging.info('Done!') 778 | 779 | # Get all actual district names 780 | logging.info('-' * PRINT_WIDTH) 781 | logging.info('Parsing districts list...') 782 | with open(DISTRICT_LIST, 'r') as f: 783 | logging.info('File: {}'.format(DISTRICT_LIST.name)) 784 | raw_data = json.load(f) 785 | parse_district_list(raw_data) 786 | logging.info('Done!') 787 | 788 | # Get district populations 789 | logging.info('-' * PRINT_WIDTH) 790 | logging.info('Parsing district metadata...') 791 | with open(META_DATA, 'r') as f: 792 | logging.info('File: {}'.format(META_DATA.name)) 793 | raw_data = json.load(f) 794 | parse_district_metadata(raw_data) 795 | logging.info('Done!') 796 | 797 | # Parse raw_data's 798 | logging.info('-' * PRINT_WIDTH) 799 | logging.info('Parsing raw_data...') 800 | i = 1 801 | while True: 802 | f = INPUT_DIR / RAW_DATA.format(n=i) 803 | if not f.is_file(): 804 | break 805 | with open(f, 'r') as f: 806 | logging.info('File: {}'.format(RAW_DATA.format(n=i))) 807 | raw_data = json.load(f) 808 | parse(raw_data, i) 809 | i += 1 810 | logging.info('Done!') 811 | 812 | # Parse additional deceased/recovered info not in raw_data 1 and 2 813 | logging.info('-' * PRINT_WIDTH) 814 | logging.info('Parsing deaths_recoveries...') 815 | for i in [1, 2]: 816 | f = INPUT_DIR / OUTCOME_DATA.format(n=i) 817 | with open(f, 'r') as f: 818 | logging.info('File: {}'.format(OUTCOME_DATA.format(n=i))) 819 | raw_data = json.load(f) 820 | parse_outcome(raw_data, i) 821 | logging.info('Done!') 822 | 823 | logging.info('-' * PRINT_WIDTH) 824 | logging.info('Adding district data for 26th April...') 825 | # Parse gospel district data for 26th April 826 | with open(DISTRICT_DATA_GOSPEL, 'r') as f: 827 | logging.info('File: {}'.format(DISTRICT_DATA_GOSPEL.name)) 828 | reader = csv.DictReader(f) 829 | parse_district_gospel(reader) 830 | logging.info('Done!') 831 | 832 | logging.info('-' * PRINT_WIDTH) 833 | logging.info('Parsing ICMR test data for India...') 834 | f = ICMR_TEST_DATA 835 | with open(f, 'r') as f: 836 | logging.info('File: {}'.format(ICMR_TEST_DATA.name)) 837 | raw_data = json.load(f, object_pairs_hook=OrderedDict) 838 | parse_icmr(raw_data) 839 | logging.info('Done!') 840 | 841 | logging.info('-' * PRINT_WIDTH) 842 | logging.info('Parsing test data for all states...') 843 | f = STATE_TEST_DATA 844 | with open(f, 'r') as f: 845 | logging.info('File: {}'.format(STATE_TEST_DATA.name)) 846 | raw_data = json.load(f, object_pairs_hook=OrderedDict) 847 | parse_state_test(raw_data) 848 | logging.info('Done!') 849 | 850 | logging.info('-' * PRINT_WIDTH) 851 | logging.info('Parsing test data for districts...') 852 | f = DISTRICT_TEST_DATA 853 | with open(f, 'r') as f: 854 | logging.info('File: {}'.format(DISTRICT_TEST_DATA.name)) 855 | reader = csv.reader(f) 856 | parse_district_test(reader) 857 | logging.info('Done!') 858 | 859 | # Fill delta values for tested 860 | logging.info('-' * PRINT_WIDTH) 861 | logging.info('Generating daily tested values...') 862 | fill_tested() 863 | logging.info('Done!') 864 | 865 | # Generate total (cumulative) data points till 26th April 866 | logging.info('-' * PRINT_WIDTH) 867 | logging.info('Generating cumulative CRD values till 26th April...') 868 | accumulate(end_date=GOSPEL_DATE) 869 | logging.info('Done!') 870 | 871 | # Fill Unknown district counts for 26th April 872 | logging.info('-' * PRINT_WIDTH) 873 | logging.info( 874 | 'Filling {} data for 26th April...'.format(UNKNOWN_DISTRICT_KEY)) 875 | fill_gospel_unknown() 876 | logging.info('Done!') 877 | 878 | # Generate rest of total (cumulative) data points 879 | logging.info('-' * PRINT_WIDTH) 880 | logging.info( 881 | 'Generating cumulative CRD values from 26th April afterwards...') 882 | accumulate(start_after_date=GOSPEL_DATE) 883 | logging.info('Done!') 884 | 885 | # Strip empty values ({}, 0, '', None) 886 | logging.info('-' * PRINT_WIDTH) 887 | logging.info('Stripping empty values...') 888 | data = stripper(data) 889 | logging.info('Done!') 890 | 891 | # Add population figures 892 | logging.info('-' * PRINT_WIDTH) 893 | logging.info('Adding state/district populations...') 894 | add_populations() 895 | logging.info('Done!') 896 | 897 | # Generate timeseries 898 | logging.info('-' * PRINT_WIDTH) 899 | logging.info('Generating timeseries...') 900 | generate_timeseries(districts=False) 901 | logging.info('Done!') 902 | 903 | logging.info('-' * PRINT_WIDTH) 904 | logging.info('Adding state and district metadata...') 905 | f = STATE_WISE 906 | with open(f, 'r') as f: 907 | logging.info('File: {}'.format(STATE_WISE.name)) 908 | raw_data = json.load(f, object_pairs_hook=OrderedDict) 909 | add_state_meta(raw_data) 910 | 911 | f = DISTRICT_WISE 912 | with open(f, 'r') as f: 913 | logging.info('File: {}'.format(DISTRICT_WISE.name)) 914 | raw_data = json.load(f, object_pairs_hook=OrderedDict) 915 | add_district_meta(raw_data) 916 | logging.info('Done!') 917 | 918 | logging.info('-' * PRINT_WIDTH) 919 | logging.info('Dumping APIs...') 920 | OUTPUT_MIN_DIR.mkdir(parents=True, exist_ok=True) 921 | 922 | # Dump prettified full data json 923 | fn = '{}-{}'.format(OUTPUT_DATA_PREFIX, 'all') 924 | with open((OUTPUT_DIR / fn).with_suffix('.json'), 'w') as f: 925 | json.dump(data, f, indent=2, sort_keys=True) 926 | # Dump minified full data 927 | with open((OUTPUT_MIN_DIR / fn).with_suffix('.min.json'), 'w') as f: 928 | json.dump(data, f, separators=(',', ':'), sort_keys=True) 929 | 930 | # Split data and dump separate json for each date 931 | for i, date in enumerate(sorted(data)): 932 | curr_data = data[date] 933 | if i < len(data) - 1: 934 | fn = '{}-{}'.format(OUTPUT_DATA_PREFIX, date) 935 | else: 936 | fn = OUTPUT_DATA_PREFIX 937 | 938 | with open((OUTPUT_DIR / fn).with_suffix('.json'), 'w') as f: 939 | json.dump(curr_data, f, indent=2, sort_keys=True) 940 | # Minified 941 | with open((OUTPUT_MIN_DIR / fn).with_suffix('.min.json'), 'w') as f: 942 | json.dump(curr_data, f, separators=(',', ':'), sort_keys=True) 943 | 944 | # Dump timeseries json 945 | with open((OUTPUT_DIR / OUTPUT_TIMESERIES_FILENAME).with_suffix('.json'), 946 | 'w') as f: 947 | json.dump(timeseries, f, indent=2, sort_keys=True) 948 | with open( 949 | (OUTPUT_MIN_DIR / OUTPUT_TIMESERIES_FILENAME).with_suffix('.min.json'), 950 | 'w') as f: 951 | json.dump(timeseries, f, separators=(',', ':'), sort_keys=True) 952 | 953 | logging.info('Done!') 954 | 955 | # Tally final date counts with statewise API 956 | logging.info('-' * PRINT_WIDTH) 957 | logging.info('Comparing data with statewise sheet...') 958 | f = STATE_WISE 959 | with open(f, 'r') as f: 960 | logging.info('File: {}'.format(STATE_WISE.name)) 961 | raw_data = json.load(f, object_pairs_hook=OrderedDict) 962 | tally_statewise(raw_data) 963 | logging.info('Done!') 964 | 965 | # Tally final date counts with districtwise API 966 | logging.info('-' * PRINT_WIDTH) 967 | logging.info('Comparing data with districtwise sheet...') 968 | f = DISTRICT_WISE 969 | with open(f, 'r') as f: 970 | logging.info('File: {}'.format(DISTRICT_WISE.name)) 971 | raw_data = json.load(f, object_pairs_hook=OrderedDict) 972 | tally_districtwise(raw_data) 973 | logging.info('Done!') 974 | 975 | logging.info('-' * PRINT_WIDTH) 976 | logging.info('{:{align}{width}}'.format('PARSER V3 END', 977 | align='^', 978 | width=PRINT_WIDTH)) 979 | logging.info('-' * PRINT_WIDTH) 980 | --------------------------------------------------------------------------------