├── .gitignore ├── LICENSE ├── README.md ├── TravelHistoryDataValidation.ipynb ├── city_name.py ├── data ├── India-Forecast.csv ├── Italy-Forecast.csv ├── NHP 2018-pages-266-277.pdf ├── South Korea-Forecast.csv ├── SouthKorea-Forecast.csv ├── Timetable_Karnataka.pdf ├── countries_acronym_aliases.json ├── countries_acronym_aliases_flattened.json └── prefered_labels.txt ├── environment.yml ├── facerec_automation └── face_rec_demo.py ├── geocoding ├── README.md ├── generate_pincode.py ├── geocode.py ├── logger.py ├── merge_csv.py ├── requirements.txt └── test.py ├── history.json ├── nbs_govt_estimations ├── .gitignore ├── 1_Assam_DistrictAnalysis_20200330.ipynb ├── 1_DistrictAnalysisForKA_20200327.ipynb ├── 2020-04-04-SEIRSKA.ipynb ├── 2020-04-06-Backtesting-Karnataka.ipynb ├── 2_DistrictAnalysisForKA_20200327.ipynb ├── AssamTravelHistory.xlsx ├── Assam_ActNow.csv ├── BusJourneys.ipynb └── KSRTCBusTimetable.csv ├── nbs_healthcare ├── HealthcareCapacityModeling_StateWiseForICU.ipynb ├── HealthcareCapacity_HospitalsNBedCounts_India.ipynb ├── HealthcareCapacity_StatewiseBedCounts.ipynb └── NHP 2018.pdf ├── nbs_scratchpad ├── 2020-04-06_Plotting_Test_Curves.ipynb ├── CitiesIndia.ipynb ├── Countries.ipynb ├── CovidLocationClearance.ipynb ├── DataTransforms_JHUDataFormat.ipynb ├── ProphetEstimates.ipynb ├── States-BedCounts.ipynb ├── TestingBiasIndiaCaseCount.ipynb ├── TravelHistory.ipynb ├── VisualizeTravelHistory.ipynb └── port-of-origin-of-journey.ipynb ├── relationship_extractor_notes_transcriber.md ├── relationship_server.py ├── requirements.txt └── test_relationship_server.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac 2 | .DS_Store 3 | 4 | # Data 5 | *.csv 6 | *.xlsx 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | pip-wheel-metadata/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | mess.py 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Nirant, Meghana and other Contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # coronaIndia [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/NirantK/coronaIndia/master?urlpath=%2Fvoila%2Frender%2FVisualizeTravelHistory.ipynb) 2 | 3 | Please find the How To, and Dev Notes for the NLP API used by covid19india.org here: [relationship_extractor](./relationship_extractor_notes_transcriber.md) -------------------------------------------------------------------------------- /TravelHistoryDataValidation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import random\n", 11 | "from pathlib import Path\n", 12 | "\n", 13 | "# !pip install camelot\n", 14 | "# import camelot\n", 15 | "\n", 16 | "# !pip install lxml\n", 17 | "import pandas as pd\n", 18 | "\n", 19 | "%load_ext autoreload\n", 20 | "%autoreload 2\n", 21 | "Path.ls = lambda x: list(x.iterdir())" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "covid19india_mirror_url = \"https://api.steinhq.com/v1/storages/5e737e60b88d3d04ae0815b8/Sheet1\"\n", 31 | "ka_url = \"https://api.steinhq.com/v1/storages/5e7c53b3b88d3d04ae0815f0/KAPatientTravelHistory\"" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "df = pd.read_json(ka_url)\n", 41 | "df.columns = [c.strip() for c in df.columns] # remove newline, space etc noise" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## Location Verification" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 6, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "def maps_link_parser(link: str):\n", 58 | " if link is None:\n", 59 | " return [None, None, None]\n", 60 | " if link.strip().upper() == \"UNKNOWN\":\n", 61 | " return [None, None, None]\n", 62 | " split_link = link.split(\"/\")\n", 63 | "# print(split_link)\n", 64 | " name_place = split_link[5].replace(\"+\", \" \")\n", 65 | " try:\n", 66 | " lat, long, _ = split_link[6].replace(\"@\", \"\").split(\",\")\n", 67 | " except:\n", 68 | " return [\"ParsingError\", \"ParsingError\", \"ParsingError\"]\n", 69 | " return [name_place, lat, long]\n", 70 | "\n", 71 | "place_name, place_lat, place_long = maps_link_parser(df[\"GoogleMapsLink\"][2])" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 7, 77 | "metadata": { 78 | "scrolled": false 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "new_df = df[\"GoogleMapsLink\"].apply(maps_link_parser)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 8, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "new_df = pd.DataFrame(new_df.tolist(), columns=[\"PlaceName\", \"Latitude\", \"Longitude\"])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 9, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/html": [ 102 | "
\n", 103 | "\n", 116 | "\n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | "
PlaceNameLatitudeLongitude
0Austin-Bergstrom International Airport30.1974711-97.6663529
1John F. Kennedy International Airport40.6413151-73.7803278
2Dubai International Airport25.253179355.3634841
3Kempegowda International Airport Bengaluru13.1986477.7044041
4Whitefield, Bengaluru, Karnataka12.964608777.7190231
............
134Thanisandra, Bengaluru, Karnataka13.058203377.6159807
135Thanisandra, Bengaluru, Karnataka13.058203377.6159807
136Rahmath Nagar, RT Nagar, Bengaluru, Karnataka ...13.018604477.5920903
137ESI Hospital12.971185477.6347212
138Jayanagar General Hospital12.926237777.5906121
\n", 194 | "

139 rows × 3 columns

\n", 195 | "
" 196 | ], 197 | "text/plain": [ 198 | " PlaceName Latitude \\\n", 199 | "0 Austin-Bergstrom International Airport 30.1974711 \n", 200 | "1 John F. Kennedy International Airport 40.6413151 \n", 201 | "2 Dubai International Airport 25.2531793 \n", 202 | "3 Kempegowda International Airport Bengaluru 13.19864 \n", 203 | "4 Whitefield, Bengaluru, Karnataka 12.9646087 \n", 204 | ".. ... ... \n", 205 | "134 Thanisandra, Bengaluru, Karnataka 13.0582033 \n", 206 | "135 Thanisandra, Bengaluru, Karnataka 13.0582033 \n", 207 | "136 Rahmath Nagar, RT Nagar, Bengaluru, Karnataka ... 13.0186044 \n", 208 | "137 ESI Hospital 12.9711854 \n", 209 | "138 Jayanagar General Hospital 12.9262377 \n", 210 | "\n", 211 | " Longitude \n", 212 | "0 -97.6663529 \n", 213 | "1 -73.7803278 \n", 214 | "2 55.3634841 \n", 215 | "3 77.7044041 \n", 216 | "4 77.7190231 \n", 217 | ".. ... \n", 218 | "134 77.6159807 \n", 219 | "135 77.6159807 \n", 220 | "136 77.5920903 \n", 221 | "137 77.6347212 \n", 222 | "138 77.5906121 \n", 223 | "\n", 224 | "[139 rows x 3 columns]" 225 | ] 226 | }, 227 | "execution_count": 9, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "new_df" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 10, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "df[\"PlaceName\"] = new_df[\"PlaceName\"]\n", 243 | "df[\"Lat\"] = new_df.Latitude\n", 244 | "df[\"Long\"] = new_df.Longitude" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## DateTime Parsing" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 11, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "from dateutil.parser import parse\n", 261 | "from datetime import datetime\n", 262 | "def insert(source_str, insert_str, pos):\n", 263 | " return source_str[:pos]+insert_str+source_str[pos:]\n", 264 | "\n", 265 | "def parse_date_time(date_time_str:str)->datetime:\n", 266 | " if date_time_str is None:\n", 267 | " return None\n", 268 | "# print(date_time_str)\n", 269 | " split_str = date_time_str.split()\n", 270 | " if len(split_str) == 1:\n", 271 | " return parse(date_time_str)\n", 272 | " date_str = split_str[0]\n", 273 | " time_str = insert(split_str[1], \":\", 2)\n", 274 | " return parse(date_str + \" \" + time_str)\n", 275 | " \n", 276 | "# sample_date_time = df[\"From\"][11]\n", 277 | "# sample_date_time, parse_date_time(sample_date_time)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 12, 283 | "metadata": { 284 | "scrolled": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "df[\"To\"] = df[\"To\"].apply(parse_date_time)\n", 289 | "df[\"From\"] = df[\"From\"].apply(parse_date_time)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 16, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "fname = ka_url.split(\"/\")[-1]\n", 299 | "df.to_csv(f\"{fname}.csv\", index=False)" 300 | ] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "Python 3", 306 | "language": "python", 307 | "name": "python3" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.7.5" 320 | } 321 | }, 322 | "nbformat": 4, 323 | "nbformat_minor": 2 324 | } 325 | -------------------------------------------------------------------------------- /city_name.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from flask import Flask, Response, abort, jsonify, request 4 | from fuzzywuzzy import process 5 | 6 | with open(Path("prefered_labels.txt").resolve(), "r") as f: 7 | prefered_labels = f.readlines() 8 | 9 | 10 | def correct_text(text: str) -> str: 11 | """[summary] 12 | 13 | Arguments: 14 | text {str} -- [Input label for the name] 15 | 16 | Returns: 17 | str -- closest fuzzy string match to the input if 18 | fuzz.ratio is greater than 85 else the same input str 19 | """ 20 | if not text: 21 | return 22 | output = {} 23 | output[process.extractOne(text, prefered_labels)[0]] = process.extractOne( 24 | text, prefered_labels 25 | )[1] 26 | if len(text.split()) > 1: 27 | output[ 28 | process.extractOne(text.split()[0], prefered_labels)[0] 29 | ] = process.extractOne(text, prefered_labels)[1] 30 | for key, value in output.items(): 31 | if value == 100: 32 | return key 33 | elif value > 85: 34 | return key 35 | return text 36 | 37 | 38 | app = Flask(__name__) 39 | 40 | 41 | @app.route("/city_name", methods=["POST"]) 42 | def correct_city_name(): 43 | try: 44 | req_data = request.get_json() 45 | results = { 46 | "correct-port-of-origin-of-journey": correct_text( 47 | req_data["port-of-origin-of-journey"] 48 | ).replace("\n", "") 49 | } 50 | except TypeError: 51 | # abort when not JSON 52 | abort(400) 53 | except KeyError: 54 | # return error when no org paramter 55 | return jsonify(error="Not the correct request format!") 56 | return jsonify(results) 57 | 58 | 59 | app.run() 60 | -------------------------------------------------------------------------------- /data/India-Forecast.csv: -------------------------------------------------------------------------------- 1 | yhat 2 | 3 3 | 1 4 | 1 5 | 2 6 | 6 7 | 5 8 | 3 9 | 3 10 | 1 11 | 0 12 | 0 13 | 3 14 | 3 15 | 2 16 | 4 17 | 5 18 | 8 19 | 12 20 | 21 21 | 25 22 | 28 23 | 34 24 | 39 25 | 44 26 | 52 27 | 64 28 | 74 29 | 85 30 | 103 31 | 123 32 | 149 33 | 185 34 | 233 35 | 288 36 | 353 37 | 436 38 | 531 39 | 644 40 | 777 41 | 932 42 | 1101 43 | 1289 44 | 1499 45 | 1725 46 | 1968 47 | 2228 48 | 2503 49 | 2784 50 | 3068 51 | 3358 52 | -------------------------------------------------------------------------------- /data/Italy-Forecast.csv: -------------------------------------------------------------------------------- 1 | yhat 2 | -40 3 | 57 4 | 105 5 | -104 6 | -45 7 | 27 8 | -50 9 | 87 10 | 246 11 | 366 12 | 248 13 | 429 14 | 661 15 | 786 16 | 1174 17 | 1638 18 | 2121 19 | 2431 20 | 3112 21 | 3927 22 | 4730 23 | 5909 24 | 7293 25 | 8846 26 | 10401 27 | 12527 28 | 15013 29 | 17739 30 | 21119 31 | 25003 32 | 29374 33 | 34079 34 | 39694 35 | 46004 36 | 52878 37 | 60708 38 | 69310 39 | 78622 40 | 88430 41 | 99240 42 | 110756 43 | 122752 44 | 135515 45 | 148752 46 | 162283 47 | 175772 48 | 189605 49 | 203366 50 | 216715 51 | 229836 52 | 242340 53 | 253971 54 | -------------------------------------------------------------------------------- /data/NHP 2018-pages-266-277.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/coronaIndia/73099b2b6fca51caf7528e96ddad6baed8fe074f/data/NHP 2018-pages-266-277.pdf -------------------------------------------------------------------------------- /data/South Korea-Forecast.csv: -------------------------------------------------------------------------------- 1 | yhat 2 | -14 3 | 45 4 | 60 5 | 72 6 | 67 7 | 151 8 | 183 9 | 366 10 | 514 11 | 712 12 | 1004 13 | 1372 14 | 1901 15 | 2425 16 | 3114 17 | 3746 18 | 4369 19 | 4995 20 | 5577 21 | 6181 22 | 6630 23 | 7096 24 | 7372 25 | 7533 26 | 7634 27 | 7677 28 | 7794 29 | 7878 30 | 8179 31 | 8568 32 | 9198 33 | 10195 34 | 11629 35 | 13679 36 | 16275 37 | 19682 38 | 23767 39 | 28653 40 | 34412 41 | 41034 42 | 48596 43 | 56900 44 | 66065 45 | 75793 46 | 86028 47 | 96657 48 | 107479 49 | 118385 50 | 128997 51 | 139272 52 | 148767 53 | 157312 54 | -------------------------------------------------------------------------------- /data/SouthKorea-Forecast.csv: -------------------------------------------------------------------------------- 1 | yhat 2 | -14 3 | 45 4 | 60 5 | 72 6 | 67 7 | 151 8 | 183 9 | 366 10 | 514 11 | 712 12 | 1004 13 | 1372 14 | 1901 15 | 2425 16 | 3114 17 | 3746 18 | 4369 19 | 4995 20 | 5577 21 | 6181 22 | 6630 23 | 7096 24 | 7372 25 | 7533 26 | 7634 27 | 7677 28 | 7794 29 | 7878 30 | 8179 31 | 8568 32 | 9198 33 | 10195 34 | 11629 35 | 13679 36 | 16275 37 | 19682 38 | 23767 39 | 28653 40 | 34412 41 | 41034 42 | 48596 43 | 56900 44 | 66065 45 | 75793 46 | 86028 47 | 96657 48 | 107479 49 | 118385 50 | 128997 51 | 139272 52 | 148767 53 | 157312 54 | -------------------------------------------------------------------------------- /data/Timetable_Karnataka.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/coronaIndia/73099b2b6fca51caf7528e96ddad6baed8fe074f/data/Timetable_Karnataka.pdf -------------------------------------------------------------------------------- /data/prefered_labels.txt: -------------------------------------------------------------------------------- 1 | Stuttgart 2 | Guinea 3 | Lagos 4 | Barbados 5 | Houston 6 | New Zealand 7 | Delhi 8 | Antananarivo 9 | Hague 10 | Heathrow 11 | Alaska 12 | Mauritius 13 | Iowa 14 | Finland 15 | Bakersfield 16 | Charlotte 17 | Salvador 18 | Surabaya 19 | Curacao 20 | Maryland 21 | Uttarakhand 22 | Maarten 23 | Australia 24 | Sweden 25 | Baghdad 26 | Tallassee 27 | Burundi 28 | Toronto 29 | Los Angeles 30 | Chicago 31 | Korea 32 | Turkey 33 | Manila 34 | Stockholm 35 | Orlando 36 | Austrailia 37 | Nagoya 38 | Cancun 39 | Michigan 40 | Zurich 41 | Bishkek 42 | Egypt 43 | Seatle 44 | Zimbabwe 45 | Bogota 46 | Milwaukee 47 | Netherlands 48 | Warsaw 49 | Gemany 50 | Barcelona 51 | Greece 52 | Cayman 53 | Dakar 54 | Omaha 55 | Milan 56 | Auckland 57 | Bangalore 58 | Abidjan 59 | York 60 | Ababa 61 | Udupi 62 | Bhutan 63 | Bergen 64 | Perth 65 | Pattaya 66 | Canberra 67 | Leeds 68 | Cincinnati 69 | Bonn 70 | Bahamas 71 | Toulouse 72 | Istanbul 73 | Durban 74 | Lauderdale 75 | Munich 76 | Banglore 77 | Phoenix 78 | Bankok 79 | Madagascar 80 | Tulsa 81 | Minnesota 82 | New York 83 | Honduras 84 | Pittsburg 85 | Rwanda 86 | Karnataka 87 | Palembang 88 | Luxembourg 89 | Dfw 90 | Hanoi 91 | Riga 92 | Polad 93 | Guatemala 94 | Canada 95 | Tokyo 96 | Madrid 97 | Mumbai 98 | Manama 99 | Lucknow 100 | Singapore 101 | America 102 | Serbia 103 | Ecuador 104 | New Castle 105 | Iran 106 | Romania 107 | UK 108 | California 109 | Brussels 110 | Beijing 111 | Frankfort 112 | Memphis 113 | Settale 114 | Prague 115 | Jakarta 116 | Italy 117 | Arlanda 118 | Sydney 119 | Florida 120 | Belize 121 | Hong 122 | Aberdeen 123 | Cleveland 124 | Peoria 125 | Peru 126 | Lumpur 127 | Siem 128 | China 129 | England 130 | Arizona 131 | Poland 132 | Vietnam 133 | Philippines 134 | Geneva 135 | Vegas 136 | Chile 137 | Lisbon 138 | Marseille 139 | Inverness 140 | Lome 141 | Basel 142 | Manchester 143 | Jodhpur 144 | Pittsburgh 145 | Texas 146 | Goa 147 | Seatlle 148 | Cardiff 149 | Panama 150 | Afghanistan 151 | Massachusetts 152 | Gothenburg 153 | Narobi 154 | Vienna 155 | Austraila 156 | Abuja 157 | Jacksonville 158 | Melbourne 159 | Kathmandu 160 | Birmingham 161 | Taipei 162 | Narita 163 | Amsterdam 164 | Edinburg 165 | Sudan 166 | Atlawat 167 | Dallas 168 | Ireland 169 | Bucharest 170 | Zambia 171 | Chester 172 | Ethiopia 173 | Shanghai 174 | Rajkot 175 | Kabul 176 | Madras 177 | Malawi 178 | Seoul 179 | Angola 180 | Nigeria 181 | Bentonville 182 | Hannover 183 | Kayseri 184 | Namibia 185 | Paris 186 | Qutar 187 | Bristol 188 | Santiago 189 | Switzerland 190 | Luxemburg 191 | Norway 192 | Taiwan 193 | Belfast 194 | Mangalore 195 | Indianapolis 196 | London 197 | Irag 198 | Yangon 199 | Berlin 200 | Muscat 201 | Louisiana 202 | Phillippines 203 | Colombo 204 | Yerevan 205 | Kansas 206 | Madinah 207 | Baku 208 | Brisbane 209 | Montreal 210 | Porto 211 | Armenia 212 | Austria 213 | Doha 214 | Brunei 215 | Washington 216 | Maputo 217 | Nashville 218 | Krakow 219 | Edinburgh 220 | Denver 221 | Oman 222 | Sharjah 223 | Maimi 224 | Oslo 225 | Sofia 226 | Senegal 227 | Lanka 228 | Hungary 229 | Dubai 230 | Ghana 231 | Dusseldorf 232 | Indonesia 233 | Basra 234 | Windsor 235 | Dublin 236 | Wellington 237 | Glasgow 238 | Phnom 239 | Budapest 240 | Iraq 241 | Ukraine 242 | Guadalajara 243 | Tampa 244 | Luxembourg 245 | Portland 246 | Dammam 247 | Phillipines 248 | Batam 249 | Penh 250 | Kong 251 | Bejing 252 | Swizerland 253 | Minnea 254 | Uganda 255 | Windsor 256 | Glasgow 257 | Quito 258 | Accra 259 | Srinagar 260 | Belgium 261 | Helsinki 262 | Fiji 263 | Moresby 264 | Cambodia 265 | Bangkok 266 | Edmonton 267 | Cairo 268 | Moscow 269 | Denmark 270 | Kuala Lumpur 271 | Belgrade 272 | Minneapolis 273 | New Orleans 274 | Myanmar 275 | Lebanon 276 | Kansai 277 | Johannesburg 278 | Ethopia 279 | Morocco 280 | Greenville 281 | Aires 282 | Addis 283 | Tanzania 284 | India 285 | Qatar 286 | Bahrain 287 | Brazil 288 | Gaborone 289 | Tahiti 290 | Philadelphia 291 | Chennai 292 | Raliegh 293 | Bulgaria 294 | Thailand 295 | Wichita 296 | Botswana 297 | Gdansk 298 | Miami 299 | Germany 300 | Phuket 301 | Belarus 302 | Angeles 303 | Curitiba 304 | Scotland 305 | Kuala 306 | Nyc 307 | Nairobi 308 | Mozambique 309 | Portugal 310 | Ville 311 | Izmir 312 | Philipines 313 | Altanta 314 | Yamunanagar 315 | Srilanka 316 | Valencia 317 | Pradesh 318 | Cincinatti 319 | Dresden 320 | Ohio 321 | Nuremburg 322 | Spain 323 | Christchurch 324 | Buenos 325 | Anchorage 326 | Paulo 327 | Detroit 328 | Nz 329 | Kenya 330 | Malaysia 331 | Osaka 332 | Argentina 333 | Vilnius 334 | Cebu 335 | Japan 336 | Medan 337 | Sandiego 338 | Lesotho 339 | Adelaide 340 | Vancouver 341 | Frankfurt 342 | Seattle 343 | Boston 344 | Bilbao 345 | Tallahassee 346 | Aruba 347 | France 348 | Nepal 349 | Ottawa 350 | Hyderabad 351 | Virginia 352 | Baltimore 353 | Rome 354 | Jeddah 355 | Mexico 356 | Jaisalmer 357 | Amman 358 | Somalia 359 | Athens 360 | Calgary 361 | Liberia 362 | Hawaii 363 | Monrovia 364 | Kuwait 365 | Penang 366 | Tauranga 367 | Dhaka 368 | Hongkong 369 | Laos 370 | Beirut 371 | Tbilisi 372 | Russia 373 | Bremen 374 | Kiev 375 | Malasia 376 | Antigua 377 | Brasil 378 | Atlanta 379 | Stavanger 380 | Erbil 381 | Algeria 382 | Copenhagen 383 | Denpasar 384 | Georgia 385 | Singapura 386 | Rica 387 | Hartford 388 | Capetown 389 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: coronaIndia 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - geojson 6 | - geopy 7 | - ipyleaflet>=0.11.1 8 | - ipympl 9 | - voila 10 | - pip 11 | - pip: 12 | - "-r requirements.txt" 13 | -------------------------------------------------------------------------------- /facerec_automation/face_rec_demo.py: -------------------------------------------------------------------------------- 1 | # from https://www.codepile.net/pile/ZJO0Gwaj 2 | 3 | # Python packages required: face_recognition, flask 4 | # Requires cmake. On mac `brew install cmake` 5 | # Package works on dlib 6 | # curl -XPOST -F "file=@/Users/maneesh/Downloads/obama.jpg" http://127.0.0.1:5001 7 | # { 8 | # "face_found_in_image": true 9 | # } 10 | # TODO: Batch run 11 | # TODO: Resize image 12 | 13 | 14 | import face_recognition 15 | from flask import Flask, jsonify, request, redirect 16 | import json 17 | import glob 18 | 19 | # files = glob.glob("/Users/maneesh/Projects/covid/face_recognition/lfw/*.jpg") 20 | 21 | 22 | app = Flask(__name__) 23 | 24 | ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'} 25 | 26 | def allowed_file(filename): 27 | return '.' in filename and \ 28 | filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS 29 | 30 | @app.route('/', methods=['GET', 'POST']) 31 | def upload_image(): 32 | # Check if a valid image file was uploaded 33 | if request.method == 'POST': 34 | if 'file' not in request.files: 35 | return redirect(request.url) 36 | 37 | file = request.files['file'] 38 | 39 | if file.filename == '': 40 | return redirect(request.url) 41 | 42 | if file and allowed_file(file.filename): 43 | # The image file seems valid! Detect faces and return the result. 44 | return detect_face(file) 45 | 46 | # If no valid image file was uploaded, show the file upload form: 47 | return ''' 48 | 49 | Covid Face Detection Demo 50 |

Upload a picture and check if it's a face.

51 |
52 | 53 | 54 |
55 | ''' 56 | 57 | 58 | def detect_face(file): 59 | if request.method == 'POST': 60 | img = face_recognition.load_image_file(file) 61 | unknown_face_encodings = face_recognition.face_encodings(img) 62 | face_found = True if len(unknown_face_encodings)>0 else False 63 | result = { 64 | "face_found_in_image": face_found 65 | } 66 | return jsonify(result) 67 | 68 | 69 | if __name__ == "__main__": 70 | app.run(host='0.0.0.0', port=5001, debug=True) 71 | -------------------------------------------------------------------------------- /geocoding/README.md: -------------------------------------------------------------------------------- 1 | # Steps to geocode - 2 | 3 | 1. `pip install -U googlemaps fire xlrd` 4 | 2. Run the script `python geocode.py --excel_sheet=ReportedTravel.xlsx --api_key=1234` 5 | -------------------------------------------------------------------------------- /geocoding/generate_pincode.py: -------------------------------------------------------------------------------- 1 | from logger import get_logger 2 | import math 3 | import pathlib 4 | 5 | import fire 6 | import googlemaps 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | logging = get_logger("pincode_generation_logs") 11 | 12 | logging.info("Logger Setup Complete") 13 | 14 | 15 | def send_reverese_geocoding_request(lat, lng, gmaps): 16 | """Responsible for sending the request to GMaps API to reverser geocode 17 | 18 | Args: 19 | lat (float): Latitude 20 | lng (float): Longitude 21 | gmaps (Object): gmaps object with declared API key 22 | 23 | Returns: 24 | dict: Response recieved from API, 25 | bool: Whether we could connect to API 26 | """ 27 | valid_data_received = True 28 | try: 29 | reverse_geocode_result = gmaps.reverse_geocode((lat, lng)) 30 | except: 31 | logging.warning(f"Failure in reaching the API - {(lat,lng)}") 32 | valid_data_received = False 33 | reverse_geocode_result = None 34 | return reverse_geocode_result, valid_data_received 35 | 36 | 37 | def retrieve_pincodes_from_response(reverse_geocode_result): 38 | """This takes the raw response from the API and gathers all the possible pincodes returned by the API 39 | 40 | Args: 41 | reverse_geocode_result (dict): Response from GMaps API 42 | 43 | Returns: 44 | List: List of all the possible pincodes 45 | """ 46 | codes = [] 47 | result_dicts = reverse_geocode_result[0]["address_components"] 48 | for result_dicts_complete in reverse_geocode_result: 49 | result_dicts = result_dicts_complete["address_components"] 50 | for result_dicts in result_dicts: 51 | if "postal_code" in result_dicts["types"]: 52 | codes.append(result_dicts["long_name"]) 53 | return codes 54 | 55 | 56 | def reverse_geocode(csv_sheet, api_key, output_file="AssamPincode.csv"): 57 | """Function which uses GMap Geocoding API to find the pincode using given latitude longitude. 58 | Saves in the same CSV sheet passed as parameter. 59 | 60 | Args: 61 | csv_sheet (str): CSV sheet that needs to be reverse geocoded for pincodes 62 | api_key (str): API Key to access the GMap API 63 | output_file (str, optional): Name of the new CSV to be stored as. Defaults to 'AssamNewAddressPincode'. 64 | """ 65 | gmaps = googlemaps.Client(key=api_key) 66 | csv_sheet = pathlib.Path(csv_sheet) 67 | csv_sheet = csv_sheet.resolve() 68 | df = pd.read_csv(csv_sheet) 69 | lats, lngs = df["Latitude"].tolist(), df["Longitude"].tolist() 70 | postal_codes, pincode_verify = [], [] 71 | is_assam = [] 72 | nocode_count, nancount, success = 0, 0, 0 73 | 74 | def verify_ifnan(lat, lng): 75 | """Simply verifies if the given lat, lng is nan or not. 76 | If nan. Appends the neccessary content to three lists 77 | 78 | Args: 79 | lat (float): Latitude 80 | lng (float): Longitude 81 | 82 | Returns: 83 | Boolean : True if the sent lat, lng were nan 84 | """ 85 | if math.isnan(lat) or math.isnan(lng): 86 | pincode_verify.append(True) 87 | postal_codes.append(None) 88 | is_assam.append(None) 89 | logging.info("Could not find pincode for NaN values") 90 | nancount += 1 91 | return True 92 | return False 93 | 94 | def get_required_code(codes): 95 | """Takes the List of all the possible pincodes and selects the pincode for Assam 96 | 97 | TODO: Maintain a mapping of state to pincode and take the state as input. 98 | 99 | Args: 100 | codes (List): Collection of all pincodes reccieved from the API 101 | """ 102 | if len(codes) == 0: 103 | logging.warning(f"API returned no postal code value - {(lat,lng)}") 104 | postal_codes.append(None) 105 | is_assam.append(None) 106 | nocode_count += 1 107 | return 108 | for code in codes: 109 | if str(code)[0] == "7": 110 | postal_codes.append(code) 111 | is_assam.append(True) 112 | return 113 | postal_codes.append(codes[0]) 114 | is_assam.append(False) 115 | return 116 | 117 | for lat, lng in tqdm(zip(lats, lngs)): 118 | # Verify that the current lat, lng is not nan. 119 | if verify_ifnan(lat, lng): 120 | continue 121 | 122 | # Lat, Lng is not nan. It can sent to the GMaps Geocoding API for reverse geocoding. 123 | # send_reverese_geocoding_request will do that and return the raw response back. 124 | reverse_geocode_result, valid_data_received = send_reverese_geocoding_request( 125 | lat, lng, gmaps 126 | ) 127 | 128 | # Make sure that we were able to successfully connect to the GMaps API and got a valid 129 | # response back. 130 | if not valid_data_received: 131 | pincode_verify.append(False) 132 | postal_codes.append(None) 133 | is_assam.append(None) 134 | continue 135 | 136 | # Valid respoonse was srecieved after successfully connecting to the API 137 | pincode_verify.append(True) 138 | 139 | # Clean the raw response recieved from the GMaps geocoding API. Filter out all the possible pincodes 140 | # sent back by the API. Out of all the codes pick the first pincode that matches an Assam pincode 141 | # (Assam pincode - starts with '7'). If no Assam pincode found mark it false for is_assam and pick the first pincode. 142 | try: 143 | codes = retrieve_pincodes_from_response(reverse_geocode_result) 144 | get_required_code(codes) 145 | if postal_codes[-1] is None: 146 | continue 147 | success += 1 148 | except: 149 | postal_codes.append(None) 150 | nocode_count += 1 151 | logging.warning(f"API returned no postal code value - {(lat,lng)}") 152 | 153 | df["pincode"] = postal_codes 154 | df["is_assam"] = is_assam 155 | logging.info( 156 | f"API Reached. But No pincode recorded. - {nocode_count} rows out of {len(df)}" 157 | ) 158 | logging.info( 159 | f"Pincoded Addresses - {len(postal_codes)} , Success={success}, NaNCount={nancount}" 160 | ) 161 | 162 | if len(set(pincode_verify)) > 1: 163 | logging.warning("There were failures when trying to reach the API.") 164 | df["pincode_verify"] = pincode_verify 165 | 166 | df.to_csv(output_file) 167 | 168 | 169 | if __name__ == "__main__": 170 | fire.Fire(reverse_geocode) 171 | -------------------------------------------------------------------------------- /geocoding/geocode.py: -------------------------------------------------------------------------------- 1 | from logger import get_logger 2 | import pathlib 3 | 4 | import fire 5 | import googlemaps 6 | import pandas as pd 7 | from tqdm import tqdm 8 | 9 | logging = get_logger("geocoding_logs") 10 | 11 | logging.info("Logger Setup Complete") 12 | 13 | 14 | def geocode_df(df, gmaps, use_column): 15 | addresses = df[use_column].tolist() 16 | count = 0 17 | total_count = 0 18 | lats, longs, geocoded = [], [], [] 19 | logging.info(f"Sheet has {len(addresses)} rows") 20 | 21 | for address in tqdm(addresses): 22 | try: 23 | location = gmaps.geocode(address, components={"country": "IN"}) 24 | geocoded.append(True) 25 | except: 26 | geocoded.append(False) 27 | logging.warning(f"Failed to reach API in sheet") 28 | 29 | try: 30 | location = location[0]["geometry"]["location"] 31 | lats.append(location["lat"]) 32 | longs.append(location["lng"]) 33 | except: 34 | count += 1 35 | lats.append(None) 36 | longs.append(None) 37 | logging.info(f"Could not geocode - {address} in sheet") 38 | total_count += 1 39 | 40 | df["Latitude"] = lats 41 | df["Longitude"] = longs 42 | df["Geocoded"] = geocoded 43 | 44 | return df, count, total_count 45 | 46 | 47 | def geocode_json(data_file, gmaps, use_column): 48 | json_name = data_file.resolve() 49 | df = pd.read_json(json_name) 50 | 51 | total_none_count = 0 52 | total_overall_count = 0 53 | 54 | logging.info(f"Starting to geocode json - {data_file}") 55 | 56 | df, count, total_count = geocode_df(df, gmaps, use_column) 57 | logging.info(f"Saving {data_file} with geocoded address") 58 | logging.info(f"Unable to geocode in current sheet - {count}") 59 | total_none_count += count 60 | total_overall_count += total_count 61 | df.to_csv(f"Sheets/{data_file.split('.')[0]}.csv", index=False) 62 | 63 | logging.info(f"Total Addreses - {total_overall_count}") 64 | logging.info(f"Total Addreses unable to geocode - {total_none_count}") 65 | 66 | 67 | def geocode_excel_sheet(data_file, gmaps, use_column): 68 | xlsx_name = data_file.resolve() 69 | xl_pd_buffer = pd.ExcelFile(xlsx_name) 70 | sheet_names = xl_pd_buffer.sheet_names 71 | 72 | total_xlsx_count = 0 73 | total_none_count = 0 74 | 75 | logging.info(f"Geocoding a total of - {len(sheet_names)} sheets") 76 | 77 | for sheet_name in sheet_names[:1]: 78 | df = pd.read_excel(xlsx_name, sheet_name=sheet_name) 79 | df, count, total_count = geocode_df(df, gmaps, use_column) 80 | logging.info(f"Saving {sheet_name} with geocoded address") 81 | logging.info(f"Unable to geocode - {count}") 82 | total_none_count += count 83 | total_xlsx_count += total_count 84 | df.to_csv(f"Sheets/{sheet_name}.csv", index=False) 85 | 86 | logging.info(f"Total Addreses - {total_xlsx_count}") 87 | logging.info(f"Total Addreses unable to geocode - {total_none_count}") 88 | 89 | 90 | def geocode(data_file, api_key, use_column="Address"): 91 | """Will take the an excel sheet as input and save a CSV after geocoding the 92 | address mentioned by the column stated. 93 | 94 | Args: 95 | data_file (str): Path to the sheet that is to be geocoded. 96 | api_key (str): API key for Google Map Geocoding API 97 | use_column (str, optional): The column that is to be used as input to API while geocoding. Defaults to "Address". 98 | """ 99 | (pathlib.Path.cwd() / "Sheets").mkdir(parents=True, exist_ok=True) # make directory "Sheets" if it does not exist 100 | gmaps = googlemaps.Client(key=api_key) 101 | data_file = pathlib.Path(data_file) 102 | 103 | if data_file.endswith(".json"): 104 | geocode_json(data_file, gmaps, use_column) 105 | elif data_file.endswith(".xlsx") or data_file.endswith(".xls"): 106 | geocode_excel_sheet(data_file, gmaps, use_column) 107 | 108 | 109 | if __name__ == "__main__": 110 | fire.Fire(geocode) 111 | -------------------------------------------------------------------------------- /geocoding/logger.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | import sys 3 | 4 | def get_logger(name): 5 | logger.add(sys.stderr, format="{time} {level} {message}", level="INFO") 6 | logger.add(f"{name}.log") 7 | return logger -------------------------------------------------------------------------------- /geocoding/merge_csv.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import pandas as pd 3 | from pathlib import Path 4 | 5 | def merge_csv(sheet_folder, save_as="Merged.csv"): 6 | """ 7 | Will combine all the CSV's formed during the geocoding process. 8 | 9 | Args: 10 | sheet_folder (str): Path to the folder where all the sheets are stored 11 | save_as (str, optional): Name of the merged CSV to save as. Defaults to "Merged.csv". 12 | """ 13 | sheet_folder_path = Path(sheet_folder) 14 | sheet_folder_path = sheet_folder_path.resolve() 15 | all_csv_sheets = list(sheet_folder_path.glob('**/*.csv')) 16 | 17 | main_df = pd.read_csv(all_csv_sheets[0]) # Store the first sheet in main_df 18 | main_df = main_df.drop("Geocoded", axis=1) 19 | 20 | total = len(main_df) 21 | for csv_sheet in all_csv_sheets[1:]: # Start iterating from the second sheet and keep joining. 22 | df = pd.read_csv(csv_sheet) 23 | df = df.drop("Geocoded", axis=1) 24 | main_df = pd.concat([main_df, df]) 25 | total += len(df) 26 | 27 | print(total, len(main_df)) 28 | save_csv_path = sheet_folder_path / save_as 29 | main_df.to_csv(save_csv_path) 30 | 31 | 32 | if __name__ == "__main__": 33 | fire.Fire(cleaner) 34 | -------------------------------------------------------------------------------- /geocoding/requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | attrs==19.3.0 3 | black==19.10b0 4 | certifi==2020.4.5.1 5 | chardet==3.0.4 6 | click==7.1.1 7 | click-plugins==1.1.1 8 | cligj==0.5.0 9 | et-xmlfile==1.0.1 10 | Fiona==1.8.13.post1 11 | fire==0.2.1 12 | geographiclib==1.50 13 | geopandas==0.7.0 14 | geopy==1.21.0 15 | googlemaps==4.2.0 16 | idna==2.9 17 | isort==4.3.21 18 | jdcal==1.4.1 19 | loguru==0.4.1 20 | munch==2.5.0 21 | numpy==1.18.2 22 | openpyxl==3.0.3 23 | pandas==1.0.3 24 | pathspec==0.7.0 25 | pynvim==0.4.1 26 | pyproj==2.6.0 27 | python-dateutil==2.8.1 28 | pytz==2019.3 29 | regex==2020.4.4 30 | requests==2.23.0 31 | Shapely==1.7.0 32 | six==1.14.0 33 | termcolor==1.1.0 34 | toml==0.10.0 35 | tqdm==4.45.0 36 | typed-ast==1.4.1 37 | urllib3==1.25.8 38 | xlrd==1.2.0 39 | XlsxWriter==1.2.8 40 | -------------------------------------------------------------------------------- /geocoding/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | xlsx_name = "ReportedTravel.xlsx" 4 | temp = pd.ExcelFile(xlsx_name) 5 | sheet_names = temp.sheet_names 6 | 7 | total = 0 8 | nones = 0 9 | 10 | for sheet_name in sheet_names: 11 | df = pd.read_csv(f"Sheets/{sheet_name}.csv") 12 | total += len(df) 13 | nones += len(df) - df["Latitude"].count() 14 | if len(set(df["geocoded"].tolist())) > 1: 15 | print(f"Sheet - {sheet_name} was not geocoded completey.") 16 | 17 | print(total, nones) 18 | -------------------------------------------------------------------------------- /nbs_govt_estimations/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /nbs_govt_estimations/1_Assam_DistrictAnalysis_20200330.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import random\n", 11 | "from pathlib import Path\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "\n", 15 | "%load_ext autoreload\n", 16 | "%autoreload 2\n", 17 | "Path.ls = lambda x: list(x.iterdir())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# !pip install --upgrade xlrd" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "assam_travel_history = Path(\"AssamTravelHistory.xlsx\").resolve()\n", 36 | "# pd.read_excel(\"AssamTravelHistory.xlsx\", sheet_name=10)\n", 37 | "xl = pd.ExcelFile(assam_travel_history)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def read_assam_excel_to_df(filename:str)->pd.DataFrame:\n", 47 | " xl = pd.ExcelFile(filename)\n", 48 | " df_list = []\n", 49 | " for sheet_name in xl.sheet_names:\n", 50 | " district_df = xl.parse(sheet_name)\n", 51 | " district_df[\"District\"] = sheet_name\n", 52 | " district_df.drop(columns=[\"S.No.\"], inplace=True)\n", 53 | " df_list.append(district_df)\n", 54 | " return pd.concat(df_list, sort=False)\n", 55 | "\n", 56 | "df = read_assam_excel_to_df(assam_travel_history)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "df[\"DateOfArrival\"] = pd.to_datetime(df[\"Date of arrival\"])\n", 66 | "df[\"DateOfReturn\"] = pd.to_datetime(df[\"Date of Receipt\"])\n", 67 | "df.drop(columns=[\"Date of arrival\", \"Date of Receipt\"], inplace=True)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 6, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# len(df)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 7, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "df_copy = df\n", 86 | "df_copy[\"Inflow\"] = 1\n", 87 | "assam_traveller_count_df = df_copy.groupby(\"District\").agg({\"Inflow\": \"sum\"})\n", 88 | "assam_traveller_count_df.reset_index(inplace=True)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 8, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "def clean_district_names(dname: str):\n", 98 | " input_to_output_mapping = {\n", 99 | " \"Cacher\": \"Cachar\",\n", 100 | " \"Kamrup_M\": \"Kamrup Metropolitan\",\n", 101 | " \"Kamrup_R\": \"Kamrup\",\n", 102 | " \"KarbiAnlong\": \"Karbi Anglong\",\n", 103 | " \"Baksha\": \"Baksa\",\n", 104 | " }\n", 105 | " return input_to_output_mapping.get(dname, dname)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 9, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "assam_traveller_count_df[\"District\"] = assam_traveller_count_df.District.apply(clean_district_names)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 10, 120 | "metadata": { 121 | "scrolled": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "assam_pop_web_raw = pd.read_html(\"https://www.census2011.co.in/census/state/districtlist/assam.html\")\n", 126 | "assam_pop_web_raw = assam_pop_web_raw[0][[\"District\", \"Population\"]]\n", 127 | "assam_pop_df = assam_pop_web_raw[~(assam_pop_web_raw[\"District\"].apply(lambda x: len(x)) > 21)]\n", 128 | "# assam_pop_df" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 11, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "assam_df = pd.merge(assam_pop_df, assam_traveller_count_df, on=\"District\", how=\"left\")" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "# Running SIERS Plus for Assam\n", 145 | "## Challenge: Assam has zero cases right now" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 12, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "# !pip install seirsplus\n", 155 | "# from https://github.com/ryansmcgee/seirsplus" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 13, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "import contextlib\n", 165 | "import io\n", 166 | "import sys\n", 167 | "\n", 168 | "@contextlib.contextmanager\n", 169 | "def nostdout():\n", 170 | " save_stdout = sys.stdout\n", 171 | " sys.stdout = io.BytesIO()\n", 172 | " yield\n", 173 | " sys.stdout = save_stdout" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 14, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "from seirsplus.models import *\n", 183 | "import networkx" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 15, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "from typing import List\n", 193 | "\n", 194 | "\n", 195 | "def get_infections(\n", 196 | " initI: int = 100, initN: int = 10 ** 5, days_N: int = 21, beta: float = 2.4\n", 197 | ") -> List[int]:\n", 198 | " model = SEIRSModel(\n", 199 | " beta=beta,\n", 200 | " sigma=1 / 5.2,\n", 201 | " gamma=1 / 12.39,\n", 202 | " initN=initN,\n", 203 | " initI=initI,\n", 204 | " beta_D=0.000,\n", 205 | " mu_D=0.02,\n", 206 | " # sigma_D=1 / 4.0,\n", 207 | " # gamma_D=1 / 9.0,\n", 208 | " theta_E=0.0002,\n", 209 | " theta_I=0.002,\n", 210 | " psi_E=0.2,\n", 211 | " psi_I=1.0,\n", 212 | " )\n", 213 | " model.run(T=days_N)\n", 214 | " S = model.numS # time series of S counts\n", 215 | " E = model.numE # time series of E counts\n", 216 | " I = model.numI # time series of I counts\n", 217 | " D_E = model.numD_E # time series of D_E counts\n", 218 | " D_I = model.numD_I # time series of D_I counts\n", 219 | " R = model.numR # time series of R counts\n", 220 | " F = model.numF # time series of F counts\n", 221 | " t = model.tseries # time values corresponding to the above time series\n", 222 | " return {\"detected_exposed\": D_E, \"detected_infected\": D_I, \"model\": model, \"t\": t}" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 16, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "# model = get_infections()[\"model\"]\n", 232 | "# ?model.figure_infections\n", 233 | "# model.figure_infections(\n", 234 | "# plot_percentages=False,\n", 235 | "# plot_D_E=False,\n", 236 | "# title=\"Worst Case Scenario\",\n", 237 | "# plot_E=False,\n", 238 | "# plot_I=False,\n", 239 | "# color_D_I='tab:red'\n", 240 | "# )\n", 241 | "from typing import Union" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 17, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "percent_travellers_infected = 10 ** -3\n", 251 | "unknown_to_known_travelers = 10\n", 252 | "\n", 253 | "assam_estimated_df = assam_df\n", 254 | "assam_estimated_df[\"initI\"] = (\n", 255 | " percent_travellers_infected * unknown_to_known_travelers * assam_df[\"Inflow\"]\n", 256 | ")\n", 257 | "assam_estimated_df[\"initI\"] = assam_estimated_df[\"initI\"].astype(int)\n", 258 | "assam_estimated_df[\"initN\"] = assam_estimated_df[\"Population\"].astype(int)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 18, 264 | "metadata": { 265 | "scrolled": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "def get_risk_estimates(\n", 270 | " assam_estimated_df: pd.DataFrame, scenario: str = \"best_case\"\n", 271 | ") -> Union[List, List]:\n", 272 | " days_N = 21\n", 273 | " atrisk_day14, atrisk_day21 = [], []\n", 274 | " if scenario == \"best_case\":\n", 275 | " beta = 1.8\n", 276 | " elif scenario == \"realistic\":\n", 277 | " beta = 2.7\n", 278 | " elif scenario == \"worst_case\":\n", 279 | " beta = 4.0\n", 280 | " for row in assam_estimated_df[[\"initI\", \"initN\", \"District\"]].iterrows():\n", 281 | " initI, initN, district = row[1][0], int(row[1][1]), row[1][2]\n", 282 | " # print(type(initI), type(initN))\n", 283 | " infection_results = get_infections(\n", 284 | " initI=initI, initN=initN, days_N=days_N, beta=beta\n", 285 | " )\n", 286 | " detected_infected = infection_results[\"detected_infected\"]\n", 287 | " day14 = int(14 * len(detected_infected) / days_N)\n", 288 | " case_count_day14 = int(infection_results[\"detected_infected\"][day14])\n", 289 | " case_count_day21 = int(infection_results[\"detected_infected\"][-1])\n", 290 | " # infection_results[\"model\"].figure_infections(\n", 291 | " # plot_percentages=False,\n", 292 | " # plot_D_E=False,\n", 293 | " # title=f\"Plausible Scenario for {district}. Population > {initN / 10**6:.2f} Million\",\n", 294 | " # side_title=\"This model is intended to help make fast decisions, not predict the future.\",\n", 295 | " # plot_E=False,\n", 296 | " # plot_I=False,\n", 297 | " # color_D_I=\"tab:red\",\n", 298 | " # )\n", 299 | " # print(case_count_day14, case_count_day21)\n", 300 | " atrisk_day14.append(case_count_day14)\n", 301 | " atrisk_day21.append(case_count_day21)\n", 302 | " return atrisk_day14, atrisk_day21" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 19, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "t = 20.90\n", 315 | "t = 20.90\n", 316 | "t = 20.90\n", 317 | "t = 20.90\n", 318 | "t = 20.90\n", 319 | "t = 20.90\n", 320 | "t = 20.90\n", 321 | "t = 20.90\n", 322 | "t = 20.90\n", 323 | "t = 20.90\n", 324 | "t = 20.90\n", 325 | "t = 20.90\n", 326 | "t = 20.90\n", 327 | "t = 20.90\n", 328 | "t = 20.90\n", 329 | "t = 20.90\n", 330 | "t = 20.90\n", 331 | "t = 20.90\n", 332 | "t = 20.90\n", 333 | "t = 20.90\n", 334 | "t = 20.90\n", 335 | "t = 20.90\n", 336 | "t = 20.90\n", 337 | "t = 20.90\n", 338 | "t = 20.90\n", 339 | "t = 20.90\n", 340 | "t = 20.90\n", 341 | "t = 20.90\n", 342 | "t = 20.90\n", 343 | "t = 20.90\n", 344 | "t = 20.90\n", 345 | "t = 20.90\n", 346 | "t = 20.90\n", 347 | "t = 20.90\n", 348 | "t = 20.90\n", 349 | "t = 20.90\n", 350 | "t = 20.90\n", 351 | "t = 20.90\n", 352 | "t = 20.90\n", 353 | "t = 20.90\n", 354 | "t = 20.90\n", 355 | "t = 20.90\n", 356 | "t = 20.90\n", 357 | "t = 20.90\n", 358 | "t = 20.90\n", 359 | "t = 20.90\n", 360 | "t = 20.90\n", 361 | "t = 20.90\n", 362 | "t = 20.90\n", 363 | "t = 20.90\n", 364 | "t = 20.90\n", 365 | "t = 20.90\n", 366 | "t = 20.90\n", 367 | "t = 20.90\n", 368 | "t = 20.90\n", 369 | "t = 20.90\n", 370 | "t = 20.90\n", 371 | "t = 20.90\n", 372 | "t = 20.90\n", 373 | "t = 20.90\n", 374 | "t = 20.90\n", 375 | "t = 20.90\n", 376 | "t = 20.90\n", 377 | "t = 20.90\n", 378 | "t = 20.90\n", 379 | "t = 20.90\n", 380 | "t = 20.90\n", 381 | "t = 20.90\n", 382 | "t = 20.90\n", 383 | "t = 20.90\n", 384 | "t = 20.90\n", 385 | "t = 20.90\n", 386 | "t = 20.90\n", 387 | "t = 20.90\n", 388 | "t = 20.90\n", 389 | "t = 20.90\n", 390 | "t = 20.90\n", 391 | "t = 20.90\n", 392 | "t = 20.90\n", 393 | "t = 20.90\n", 394 | "t = 20.90\n" 395 | ] 396 | } 397 | ], 398 | "source": [ 399 | "scenario =\"best_case\"\n", 400 | "atrisk_day14, atrisk_day21 = get_risk_estimates(assam_estimated_df, scenario = scenario)\n", 401 | "assam_estimated_df[f\"day14_{scenario}\"] = atrisk_day14\n", 402 | "assam_estimated_df[f\"day21_{scenario}\"] = atrisk_day21\n", 403 | "# -------------------------------------------------\n", 404 | "scenario =\"realistic\"\n", 405 | "atrisk_day14, atrisk_day21 = get_risk_estimates(assam_estimated_df, scenario = scenario)\n", 406 | "assam_estimated_df[f\"day14_{scenario}\"] = atrisk_day14\n", 407 | "assam_estimated_df[f\"day21_{scenario}\"] = atrisk_day21\n", 408 | "# -------------------------------------------------\n", 409 | "scenario =\"worst_case\"\n", 410 | "atrisk_day14, atrisk_day21 = get_risk_estimates(assam_estimated_df, scenario = scenario)\n", 411 | "assam_estimated_df[f\"day14_{scenario}\"] = atrisk_day14\n", 412 | "assam_estimated_df[f\"day21_{scenario}\"] = atrisk_day21" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 20, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "assam_estimated_df.to_csv(\"Assam_ActNow.csv\", index=False)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 21, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "# atrisk_day14, atrisk_day21" 431 | ] 432 | } 433 | ], 434 | "metadata": { 435 | "kernelspec": { 436 | "display_name": "Python 3", 437 | "language": "python", 438 | "name": "python3" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.8.2" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 2 455 | } 456 | -------------------------------------------------------------------------------- /nbs_govt_estimations/2020-04-06-Backtesting-Karnataka.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import random\n", 11 | "from pathlib import Path\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "\n", 15 | "%load_ext autoreload\n", 16 | "%autoreload 2\n", 17 | "Path.ls = lambda x: list(x.iterdir())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "state_wise_url = \"https://api.covid19india.org/raw_data.json\"" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": { 33 | "scrolled": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "from urllib.request import urlopen\n", 38 | "from dateutil.parser import parse\n", 39 | "from pandas import json_normalize\n", 40 | "\n", 41 | "response = urlopen(state_wise_url)\n", 42 | "json_data = response.read().decode(\"utf-8\", \"replace\")\n", 43 | "\n", 44 | "d = json.loads(json_data)[\"raw_data\"]\n", 45 | "df = json_normalize(d)\n", 46 | "df = pd.DataFrame(df)\n", 47 | "# df.date = df.date.apply(parse)" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 3", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.8.2" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 4 72 | } 73 | -------------------------------------------------------------------------------- /nbs_govt_estimations/2_DistrictAnalysisForKA_20200327.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import random\n", 11 | "from pathlib import Path\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "\n", 15 | "%load_ext autoreload\n", 16 | "%autoreload 2\n", 17 | "Path.ls = lambda x: list(x.iterdir())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/html": [ 28 | "
\n", 29 | "\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | "
DistrictCaseCountBusCountQTNCount
0Kalaburagi36.04
1Kodagu127.047
2Chikkaballapura40.09
3Mysuru319.0231
4Dharwad112.039
5Uttara Kannada423.023
6Dhakshina Kannada582.0756
7Udupi129.0414
8Tumkur11.078
9Shivamogga043.099
10Chikkamagaluru032.027
11Davanagere028.058
12Ballari013.044
13Raichur012.041
14Belagavi011.016
15Vijayapura05.04
16Gadag03.016
17Haveri03.05
18Hassan01.075
19Bagalkote00.026
20Bengaluru (Urban, Rural & BBMP)00.07315
21Chamrajanagara00.06
22Kolar00.06
23Kolara00.08
24Koppala00.06
25Mandya00.08
26Ramanagara00.08
\n", 244 | "
" 245 | ], 246 | "text/plain": [ 247 | " District CaseCount BusCount QTNCount\n", 248 | "0 Kalaburagi 3 6.0 4\n", 249 | "1 Kodagu 1 27.0 47\n", 250 | "2 Chikkaballapura 4 0.0 9\n", 251 | "3 Mysuru 3 19.0 231\n", 252 | "4 Dharwad 1 12.0 39\n", 253 | "5 Uttara Kannada 4 23.0 23\n", 254 | "6 Dhakshina Kannada 5 82.0 756\n", 255 | "7 Udupi 1 29.0 414\n", 256 | "8 Tumkur 1 1.0 78\n", 257 | "9 Shivamogga 0 43.0 99\n", 258 | "10 Chikkamagaluru 0 32.0 27\n", 259 | "11 Davanagere 0 28.0 58\n", 260 | "12 Ballari 0 13.0 44\n", 261 | "13 Raichur 0 12.0 41\n", 262 | "14 Belagavi 0 11.0 16\n", 263 | "15 Vijayapura 0 5.0 4\n", 264 | "16 Gadag 0 3.0 16\n", 265 | "17 Haveri 0 3.0 5\n", 266 | "18 Hassan 0 1.0 75\n", 267 | "19 Bagalkote 0 0.0 26\n", 268 | "20 Bengaluru (Urban, Rural & BBMP) 0 0.0 7315\n", 269 | "21 Chamrajanagara 0 0.0 6\n", 270 | "22 Kolar 0 0.0 6\n", 271 | "23 Kolara 0 0.0 8\n", 272 | "24 Koppala 0 0.0 6\n", 273 | "25 Mandya 0 0.0 8\n", 274 | "26 Ramanagara 0 0.0 8" 275 | ] 276 | }, 277 | "execution_count": 2, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "numbers_df = pd.read_csv(\"district_info.csv\")\n", 284 | "numbers_df" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 3, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "passenger_count = 60\n", 294 | "private_bus_to_public_bus = 50\n", 295 | "travel_risk = 1 / 10 ** 5\n", 296 | "confirmed_risk = 20\n", 297 | "qtn_risk = 1 / 10 ** 3" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 4, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "df = numbers_df" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 5, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "df[\"AtRisk\"] = (\n", 316 | " numbers_df[\"BusCount\"]\n", 317 | " * passenger_count\n", 318 | " * private_bus_to_public_bus\n", 319 | " / passenger_count\n", 320 | " * private_bus_to_public_bus\n", 321 | " * travel_risk\n", 322 | " + numbers_df[\"QTNCount\"] / qtn_risk\n", 323 | " + confirmed_risk * numbers_df[\"CaseCount\"]\n", 324 | ").astype(int)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 6, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/html": [ 335 | "
\n", 336 | "\n", 349 | "\n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | "
DistrictAtRisk
20Bengaluru (Urban, Rural & BBMP)7315000
6Dhakshina Kannada756102
7Udupi414020
3Mysuru231060
9Shivamogga99001
8Tumkur78020
18Hassan75000
11Davanagere58000
1Kodagu47020
12Ballari44000
13Raichur41000
4Dharwad39020
10Chikkamagaluru27000
19Bagalkote26000
5Uttara Kannada23080
14Belagavi16000
16Gadag16000
2Chikkaballapura9080
25Mandya8000
23Kolara8000
26Ramanagara8000
21Chamrajanagara6000
22Kolar6000
24Koppala6000
17Haveri5000
0Kalaburagi4060
15Vijayapura4000
\n", 495 | "
" 496 | ], 497 | "text/plain": [ 498 | " District AtRisk\n", 499 | "20 Bengaluru (Urban, Rural & BBMP) 7315000\n", 500 | "6 Dhakshina Kannada 756102\n", 501 | "7 Udupi 414020\n", 502 | "3 Mysuru 231060\n", 503 | "9 Shivamogga 99001\n", 504 | "8 Tumkur 78020\n", 505 | "18 Hassan 75000\n", 506 | "11 Davanagere 58000\n", 507 | "1 Kodagu 47020\n", 508 | "12 Ballari 44000\n", 509 | "13 Raichur 41000\n", 510 | "4 Dharwad 39020\n", 511 | "10 Chikkamagaluru 27000\n", 512 | "19 Bagalkote 26000\n", 513 | "5 Uttara Kannada 23080\n", 514 | "14 Belagavi 16000\n", 515 | "16 Gadag 16000\n", 516 | "2 Chikkaballapura 9080\n", 517 | "25 Mandya 8000\n", 518 | "23 Kolara 8000\n", 519 | "26 Ramanagara 8000\n", 520 | "21 Chamrajanagara 6000\n", 521 | "22 Kolar 6000\n", 522 | "24 Koppala 6000\n", 523 | "17 Haveri 5000\n", 524 | "0 Kalaburagi 4060\n", 525 | "15 Vijayapura 4000" 526 | ] 527 | }, 528 | "execution_count": 6, 529 | "metadata": {}, 530 | "output_type": "execute_result" 531 | } 532 | ], 533 | "source": [ 534 | "df.sort_values(by=[\"AtRisk\"], ascending=False)[[\"District\", \"AtRisk\"]]" 535 | ] 536 | } 537 | ], 538 | "metadata": { 539 | "kernelspec": { 540 | "display_name": "Python 3", 541 | "language": "python", 542 | "name": "python3" 543 | }, 544 | "language_info": { 545 | "codemirror_mode": { 546 | "name": "ipython", 547 | "version": 3 548 | }, 549 | "file_extension": ".py", 550 | "mimetype": "text/x-python", 551 | "name": "python", 552 | "nbconvert_exporter": "python", 553 | "pygments_lexer": "ipython3", 554 | "version": "3.7.5" 555 | } 556 | }, 557 | "nbformat": 4, 558 | "nbformat_minor": 2 559 | } 560 | -------------------------------------------------------------------------------- /nbs_govt_estimations/AssamTravelHistory.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/coronaIndia/73099b2b6fca51caf7528e96ddad6baed8fe074f/nbs_govt_estimations/AssamTravelHistory.xlsx -------------------------------------------------------------------------------- /nbs_govt_estimations/Assam_ActNow.csv: -------------------------------------------------------------------------------- 1 | District,Population,Inflow,initI,initN,day14_best_case,day21_best_case,day14_realistic,day21_realistic,day14_worst_case,day21_worst_case 2 | Nagaon,2823768,3049,30,2823768,32,728,159,4811,965,11740 3 | Dhubri,1949258,1633,16,1949258,17,394,85,2862,533,7679 4 | Sonitpur,1924110,3501,35,1924110,38,810,183,4310,1019,8836 5 | Cachar,1736617,1352,13,1736617,14,322,69,2411,438,6705 6 | Barpeta,1693622,1556,15,1693622,16,368,80,2601,495,6782 7 | Kamrup,1517542,1582,15,1517542,16,365,79,2483,488,6218 8 | Tinsukia,1327929,760,7,1327929,7,176,37,1470,244,4720 9 | Dibrugarh,1326335,3481,34,1326335,36,753,175,3453,910,6438 10 | Kamrup Metropolitan,1253938,840,8,1253938,8,199,42,1574,274,4667 11 | Karimganj,1228686,1542,15,1228686,16,360,79,2257,472,5249 12 | Sivasagar,1151050,802,8,1151050,8,199,42,1526,272,4370 13 | Jorhat,1092256,1499,14,1092256,15,335,74,2059,437,4710 14 | Golaghat,1066888,1011,10,1066888,10,244,53,1693,328,4323 15 | Lakhimpur,1042137,2043,20,1042137,21,460,104,2392,575,4829 16 | Goalpara,1008183,1043,10,1008183,10,243,53,1653,325,4133 17 | Morigaon,957423,3330,33,957423,35,697,168,2799,809,4853 18 | Karbi Anglong,956313,755,7,956313,7,173,37,1309,236,3673 19 | Baksa,950075,1283,12,950075,13,287,63,1777,375,4085 20 | Darrang,928500,2336,23,928500,24,512,119,2383,621,4483 21 | Kokrajhar,887142,264,2,887142,2,51,10,521,73,2477 22 | Udalguri,831668,914,9,831668,9,218,47,1431,289,3470 23 | Nalbari,771639,915,9,771639,9,217,47,1383,285,3268 24 | Bongaigaon,738804,639,6,738804,6,148,32,1078,200,2904 25 | Dhemaji,686133,1625,16,686133,17,359,83,1716,439,3281 26 | Hailakandi,659296,548,5,659296,5,123,26,922,168,2552 27 | Chirang,482162,470,4,482162,4,98,21,712,133,1904 28 | Dima Hasao,214102,387,3,214102,3,71,15,422,92,938 29 | -------------------------------------------------------------------------------- /nbs_govt_estimations/BusJourneys.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import random\n", 11 | "from pathlib import Path\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "import pdfplumber\n", 15 | "\n", 16 | "%load_ext autoreload\n", 17 | "%autoreload 2\n", 18 | "Path.ls = lambda x: list(x.iterdir())\n", 19 | "# from urllib.parse import quote_plus" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "filepath = Path(\"Timetable_Karnataka.pdf\").resolve()" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": { 35 | "scrolled": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "pagewise_df = []\n", 40 | "with pdfplumber.open(filepath) as pdf:\n", 41 | " for page in pdf.pages[:]:\n", 42 | " page_df = page.extract_tables(\n", 43 | " {\n", 44 | " # \"vertical_strategy\": \"lines\",\n", 45 | " # \"horizontal_strategy\": \"lines\",\n", 46 | " # \"keep_blank_chars\": True,\n", 47 | " # \"min_words_horizontal\": 6,\n", 48 | " # \"text_tolerance\":15\n", 49 | " }\n", 50 | " )\n", 51 | " pagewise_df.append(page_df)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": { 58 | "scrolled": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "# pagewise_df = pagewise_df[0]\n", 63 | "t0 = [x[0] for x in pagewise_df]\n", 64 | "flatten = lambda l: [item for sublist in l for item in sublist]\n", 65 | "df = pd.DataFrame(flatten(t0)[1:], columns=flatten(t0)[0])" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "df.to_csv(\"KSRTCBusTimetable.csv\", index=False)" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.7.5" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } 100 | -------------------------------------------------------------------------------- /nbs_govt_estimations/KSRTCBusTimetable.csv: -------------------------------------------------------------------------------- 1 | FROM,TO,"NO. OF 2 | SERVICES",TIMING 3 | AGUMBE,BENGALURU,1,2001 4 | ALIKE,BENGALURU,1,2031 5 | ANAVATTI,BENGALURU,1,2000 6 | ANEKAL,DHARMASTALA,1,1830 7 | ATHANI,BENGALURU,2,"1415, 1800" 8 | ATHANI,DHARMASTALA,1,1230 9 | BAGALKOT,BIDAR,1,2000 10 | BAGALKOT,MANGALURU,2,"1645, 1800" 11 | BAGALKOT,MYSURU,1,"0600," 12 | BAILAHONGALA,DHARMASTALA,1,"0801," 13 | BALLARI,BELAGAVI,1,2205 14 | BALLARI,BENGALURU,6,"0501, 2100, 2159, 2227, 2245, 2302" 15 | BALLARI,BIDAR,2,"1900, 2000" 16 | BALLARI,DAVANAGERE,1,"0500," 17 | BALLARI,DHARMASTALA,1,2000 18 | BALLARI,KALABURAGI,3,"2130, 2214, 2230" 19 | BALLARI,MANGALURU,2,"1735, 1845" 20 | BALLARI,MYSURU,2,"2001, 2132" 21 | BANAHATTI,BENGALURU,1,1730 22 | BELAGAVI,BALLARI,2,"1900, 2200" 23 | BELAGAVI,BENGALURU,14,"0730, 0800, 1205, 1332, 1840, 1900, 1930, 24 | 2001, 2043, 2115, 2130, 2135, 2145, 2200" 25 | BELAGAVI,BIDAR,2,"1800, 2100" 26 | BELAGAVI,CHIKKAMAGALURU,1,"0830," 27 | BELAGAVI,HASSAN,1,1730 28 | BELAGAVI,KALABURAGI,3,"0830, 2130, 2205" 29 | BELAGAVI,MANGALURU,1,2000 30 | BELAGAVI,MYSURU,4,"1800, 2001, 2100, 2130" 31 | BELAGAVI,RAICHUR,1,2130 32 | BELAGAVI,SHAKTINAGAR DEVASUGURU,2,"0800, 0930" 33 | BELAGAVI,SIRSI,1,1200 34 | BELTHANGADI,BENGALURU,2,"2120, 2145" 35 | BENGALURU,AGUMBE,1,2203 36 | BENGALURU,ALIKE,1,2216 37 | BENGALURU,ALURSIDDAPUR,1,1416 38 | BENGALURU,ANAVATTI,1,2130 39 | BENGALURU,ATHANI,2,"1630, 1900" 40 | BENGALURU,BAGAMANDALA,1,2316 41 | BENGALURU,BALLARI,4,"1415, 2206, 2237, 2300" 42 | BENGALURU,BANAHATTI,1,1945 43 | BENGALURU,BASAVAKALYANA,1,1800 44 | BENGALURU,BELAGAVI,9,"0930, 1932, 1935, 2121, 2130, 2133, 2210, 45 | 2230, 2315" 46 | BENGALURU,BELTHANGADI,2,"2130, 2159" 47 | BENGALURU,BETAGERI,1,2001 48 | BENGALURU,BHATKAL,4,"0701, 1850, 1902, 1930" 49 | BENGALURU,BIDAR,5,"1715, 1808, 1820, 1852, 1936" 50 | BENGALURU,BILAGI,1,2100 51 | BENGALURU,CHICKODI,1,2055 52 | BENGALURU,CHIKKAMAGALURU,12,"0530, 0632, 0830, 1030, 1101, 1301, 1431, 53 | 1815, 2229, 2259, 2330, 2331" 54 | BENGALURU,DANDELI,2,"2002, 2044" 55 | BENGALURU,DAVANAGERE,24,"0500, 0600, 0630, 0655, 0732, 0800, 0835, 56 | 0900, 1200, 1230, 1300, 1400, 1402, 1446, 57 | 1600, 1650, 1720, 1745, 1830, 1915, 2220, 58 | 2230, 2300, 2347" 59 | FROM,TO,"NO. OF 60 | SERVICES",TIMING 61 | BENGALURU,DEVADURGA,2,"2015, 2030" 62 | BENGALURU,DHARMASTALA,21,"0646, 0815, 0905, 0930, 1002, 1214, 1347, 63 | 1917, 2059, 2116, 2134, 2144, 2145, 2217, 64 | 2219, 2229, 2234, 2249, 2251, 2303,2314" 65 | BENGALURU,DHARWAD,2,"2220, 2330" 66 | BENGALURU,DONIMALAI,2,"2230, 2307" 67 | BENGALURU,GADAG,1,2145 68 | BENGALURU,GANGAVATHI,4,"2025, 2201, 2245, 2331" 69 | BENGALURU,GOKAK,1,2030 70 | BENGALURU,GOKARNA,4,"2035, 2100, 2215, 2201" 71 | BENGALURU,HAMPI,1,2259 72 | BENGALURU,HANAGAL,1,2259 73 | BENGALURU,HARIHAR,4,"0531, 1330, 2330, 2355" 74 | BENGALURU,HATTI,2,"1815, 2232" 75 | BENGALURU,HAVERI,1,2345 76 | BENGALURU,HEGGODU SAGAR,1,2215 77 | BENGALURU,HIREKERUR,1,2235 78 | BENGALURU,HORANADU,9,"0733, 1114, 1215, 2000, 2045, 2133, 2202, 79 | 2226, 2246" 80 | BENGALURU,HOSADURGA,8,"0530, 0715, 0915, 1130, 1430, 1545, 1700, 81 | 1745," 82 | BENGALURU,HOSAPETE,1,2314 83 | BENGALURU,HUBBALLI,10,"1101, 1130, 1500, 2145, 2213, 2230, 2231, 84 | 2300, 2336, 2358" 85 | BENGALURU,HUMCHA,2,"2241, 2245" 86 | BENGALURU,HUVINA HADAGALI,1,2250 87 | BENGALURU,INDI,1,2216 88 | BENGALURU,KALABURAGI,6,"1600, 1645, 1801, 1845, 1915, 2100" 89 | BENGALURU,KAMPLI,1,2245 90 | BENGALURU,KARATAGI,2,"2131, 2340" 91 | BENGALURU,KARWAR,3,"1701, 1901, 2020" 92 | BENGALURU,KATIL,1,2200 93 | BENGALURU,KOLLUR,1,2100 94 | BENGALURU,KOPPAL,2,"2100, 2130" 95 | BENGALURU,KUKKESUBRAMANYA,8,"0817, 0834, 0900, 2017, 2117, 2200, 2314, 96 | 2316" 97 | BENGALURU,KUMTA,1,2011 98 | BENGALURU,KUNDAPURA,19,"0801, 0830, 0835, 1900, 1930, 2030, 2033, 99 | 2042, 2045, 2048, 2113, 2115, 2120, 2128, 100 | 2156, 2200, 2201, 2210, 2250" 101 | BENGALURU,KUPPALLI,1,2230 102 | BENGALURU,KUSTAGI,2,"1920, 2115" 103 | BENGALURU,M M HILLS,2,"0730, 2030" 104 | BENGALURU,MANGALURU,42,"0605, 0700, 0702, 0736, 0750, 0829, 0901, 105 | 0906, 0930, 1003, 1004, 1016, 1200, 1216, 106 | 1300, 1428, 1432, 1831, 2000, 2030, 2031, 107 | 2112, 2116, 2135, 2145, 2146, 2200, 2218, 108 | 2221, 2224, 2229, 2230, 2235, 2240, 2242, 109 | 2251, 2300, 2301, 2311, 2315, 2334, 2350" 110 | BENGALURU,MANIPAL,3,"0930, 2155, 2215" 111 | BENGALURU,MERCARA MADIKERI,18,"0800, 0832, 1006, 1205, 1331, 1515, 1830, 112 | 2105, 2201, 2215, 2228, 2236, 2300, 2331, 113 | 2333, 2345, 2357, 2359" 114 | BENGALURU,MURDESHWARA,1,2130 115 | BENGALURU,MYSURU,1,"0646," 116 | BENGALURU,PAVAGADA,1,2131 117 | BENGALURU,PUTTUR,5,"2140, 2152, 2159, 2236, 2302" 118 | FROM,TO,"NO. OF 119 | SERVICES",TIMING 120 | BENGALURU,RAICHUR,5,"2037, 2059, 2152, 2208, 2235" 121 | BENGALURU,RON,1,2122 122 | BENGALURU,SADALGA,1,1945 123 | BENGALURU,SAGARA,2,"2230, 2325" 124 | BENGALURU,SAKALESHPUR,1,"0831," 125 | BENGALURU,SAUNDATTI,1,2130 126 | BENGALURU,SHAHAPUR,1,2045 127 | BENGALURU,SHAKTINAGAR DEVASUGURU,1,1931 128 | BENGALURU,SHIVAMOGGA,33,"0600, 0630, 0730, 0.801, 0830,0901, 1007, 129 | 1031, 1129, 1201, 1244, 1331, 1346, 1400, 130 | 1416, 1431, 1446, 1501, 1514, 1601, 1730, 131 | 2131, 2159, 2200, 2230, 2302, 2304, 2310, 132 | 2316, 2319, 2331, 2347, 2358" 133 | BENGALURU,SIGANDUR,2,"0830, 2200" 134 | BENGALURU,SINDHANOOR,2,"2144, 2246" 135 | BENGALURU,SIRSI,8,"0631, 1205, 2059, 2131, 2150, 2206, 2229, 136 | 2247" 137 | BENGALURU,SIRUGUPPA,2,"2244, 2330" 138 | BENGALURU,SOMVARPET,5,"2015, 2100, 2145, 2159, 2259" 139 | BENGALURU,SRINGERI,11,"0801, 2044, 2114, 2129, 2131, 2143, 2201, 140 | 2216, 2228, 2233, 2331" 141 | BENGALURU,SULLYA,2,"2230, 2240" 142 | BENGALURU,TALIKOTE,2,"1940, 2005" 143 | BENGALURU,UDUPI,5,"1950, 2002, 2032, 2102, 2131" 144 | BENGALURU,VIDYANAGAR JINDAL,1,2246 145 | BENGALURU,VIJAYAPURA,3,"1600, 1947, 2030" 146 | BENGALURU,VIRAJAPETE,8,"0545, 0634, 0921, 1301, 1431, 2232, 2300, 147 | 2344" 148 | BENGALURU,YELBURGA,3,"2045, 2231, 2345" 149 | BHATKAL,BENGALURU,2,"1615, 1901" 150 | BIDAR,BALLARI,2,"2000, 2030" 151 | BIDAR,BELAGAVI,2,"1800, 2200" 152 | BIDAR,BENGALURU,5,"1536, 1632, 1701, 1801, 1832" 153 | BIDAR,DAVANAGERE,2,"1430, 1800" 154 | BIDAR,HUBBALLI,1,1845 155 | BIDAR,SHIVAMOGGA,1,1530 156 | BILAGI,BENGALURU,1,1913 157 | BILAGI,MANGALURU,1,1530 158 | BYNDOOR,BENGALURU,1,2001 159 | CHALLAKERE,DHARMASTALA,1,1430 160 | CHIKKAMAGALURU,BENGALURU,17,"0400, 0430, 0445, 0501, 1130, 1201, 1331, 161 | 1400, 1432, 1500, 1611, 2300, 2305, 2330, 162 | 2335, 2358, 2359" 163 | CHIKKAMAGALURU,KARWAR,1,"0600," 164 | CHIKKAMAGALURU,MANGALURU,1,"0800," 165 | CHIKKAMAGALURU,UDUPI,1,"0630," 166 | CHIKKAMAGALURU,VIJAYAPURA,1,1400 167 | CHIKKODI,BENGALURU,2,"1530, 1901" 168 | CHIKKODI,KALABURAGI,1,1040 169 | CHITRADURGA,BENGALURU,4,"0501, 0600, 0900, 1030" 170 | CHITRADURGA,DHARMASTALA,2,"0500, 2145" 171 | CHITRADURGA,MANGALURU,1,2100 172 | CHITRADURGA,MYSURU,2,"0945, 1330" 173 | DANDELI,BENGALURU,4,"0745, 1701, 1759, 1930" 174 | DANDELI,KALABURAGI,2,"0700, 2000" 175 | DANDELI,SHAKTINAGAR DEVASUGURU,2,"1515, 1730" 176 | DAVANAGERE,BALLARI,1,1030 177 | FROM,TO,"NO. OF 178 | SERVICES",TIMING 179 | DAVANAGERE,BENGALURU,26,"0516, 0602, 0715, 0740, 0805, 0845, 0910, 180 | 0945, 1000, 1029, 1130, 1230, 1300, 1401, 181 | 1500, 1605, 1700, 1745, 1830, 2245, 2315, 182 | 2330, 2343, 2344, 2359" 183 | DAVANAGERE,BIDAR,2,"1700, 1830" 184 | DAVANAGERE,KALABURAGI,2,"1902, 2001" 185 | DAVANAGERE,MANGALURU,1,2200 186 | DAVANAGERE,MYSURU,2,"1931, 2231" 187 | DAVANAGERE,RAICHUR,2,"2000, 2102" 188 | DEVADURGA,BENGALURU,2,"1732, 1900" 189 | DHARMASTALA,ANEKAL,1,2130 190 | DHARMASTALA,BALLARI,1,2030 191 | DHARMASTALA,BENGALURU,17,"0920, 0930, 1015, 1030, 1045, 1215, 1317, 192 | 1331, 1400, 2001, 2031, 2145, 2150, 2201, 193 | 2203, 2215, 2301" 194 | DHARMASTALA,CHITRADURGA,1,2005 195 | DHARMASTALA,HOSAPETE,5,"0800, 0915, 1045, 1135, 1645" 196 | DHARMASTALA,HUBBALLI,1,"0830," 197 | DHARMASTALA,MYSURU,8,"1000, 1030, 1100, 1201, 1214, 1330, 1400, 198 | 2230" 199 | DHARMASTALA,TUMAKURU,2,"1330, 2100" 200 | DHARWAD,BENGALURU,2,"2031, 2104" 201 | DHARWAD,KALABURAGI,2,"1832, 2000" 202 | DHARWAD,RAICHUR,1,2132 203 | DHARWAD,YADAGIRI,1,2018 204 | DONIMALAI,BENGALURU,2,"1930, 2200" 205 | DONIMALAI,KOLAR,1,2100 206 | DONIMALAI,MANIPAL,1,1816 207 | GADAG,BENGALURU,3,"1830, 1930, 2145" 208 | GADAG,MYSURU,1,"0630," 209 | GAJENDRAGADA,ARASIKERE,1,"0800," 210 | GAJENDRAGADA,BENGALURU,1,1930 211 | GAJENDRAGADA,MANGALURU,1,1700 212 | GANGAVATHI,BENGALURU,8,"1930, 2015, 2045, 2101, 2115, 2132, 2135, 213 | 2145" 214 | GANGAVATHI,MANGALURU,1,1730 215 | GOKAK,BENGALURU,1,1829 216 | GOKARNA,BENGALURU,4,"1700, 1905, 1910, 1959" 217 | GOKARNA,KUKKESUBRAMANYA,1,"0845," 218 | HAGARIBOMMANAHALLI,BENGALURU,1,2145 219 | HALIYAL,DHARMASTALA,1,"0645," 220 | HAMPI,BENGALURU,1,2000 221 | HANAGAL,BENGALURU,1,2030 222 | HARAPANAHALLI,BENGALURU,1,2145 223 | HARAPANAHALLI,MANGALURU,1,1945 224 | HARAPANAHALLI,MANIPAL,1,2045 225 | HARIHARA,BENGALURU,5,"1230, 1330, 1400, 1530, 2100" 226 | HASSAN,BELAGAVI,1,1900 227 | HASSAN,BENGALURU,27,"0430, 0502, 0530, 0600, 0631, 0700, 0800, 228 | 0900, 0930, 1000, 1030, 1100, 1130, 1201, 229 | 1230, 1301, 1330, 1405, 1430, 1502, 1530, 230 | 1602, 1630, 1700, 1730, 1830, 1900" 231 | HATTI,BENGALURU,1,2015 232 | HAVERI,DHARMASTALA,1,"0830," 233 | HAVERI,KALABURAGI,1,"0630," 234 | HEGGODU SAGAR,BENGALURU,1,2014 235 | HIREKERUR,BENGALURU,1,2031 236 | FROM,TO,"NO. OF 237 | SERVICES",TIMING 238 | HORANADU,BENGALURU,9,"0815, 0903, 1245, 1345, 1445, 2030, 2115, 239 | 2130, 2145" 240 | HORANADU,DODDABALLAPURA,1,"0730.," 241 | HORANADU,GUNDLUPET,1,"0700," 242 | HORANADU,KUKKESUBRAMANYA,1,"0945," 243 | HOSADURGA,BENGALURU,8,"0715, 0800, 0830, 0930, 1015, 1100, 1500, 244 | 2230" 245 | HOSANAADU,KOLAR GOLD FIELD,1,2100 246 | HOSAPETE,BENGALURU,3,"2030, 2131, 2230" 247 | HOSAPETE,DHARMASTALA,2,"1715, 1830" 248 | HOSAPETE,KALABURAGI,1,2246 249 | HOSAPETE,MYSURU,3,"1901, 2000, 2131" 250 | HUBBALLI,BENGALURU,17,"0629, 0730, 0815, 0829, 0900, 1000, 1129, 251 | 1300, 1500, 1915, 1931, 2031, 2101, 2131, 252 | 2200, 2201, 2230," 253 | HUBBALLI,BIDAR,1,1830 254 | HUBBALLI,CHINTAMANI,1,2000 255 | HUBBALLI,DHARMASTALA,3,"0631, 0650, 1800" 256 | HUBBALLI,KALABURAGI,2,"0630, 2231" 257 | HUBBALLI,KARWAR,2,"0600, 0640," 258 | HUBBALLI,KUKKESUBRAMANYA,1,1515 259 | HUBBALLI,MERCARA MADIKERI,1,1700 260 | HUBBALLI,MYSURU,1,"0715," 261 | HUBBALLI,PUTTUR,1,"0830," 262 | HUBBALLI,SHIVAMOGGA,1,"0730," 263 | HUBBALLI,TADADI,1,"0800," 264 | HUBBALLI,UDUPI,1,1315 265 | HUMCHA,BENGALURU,1,2000 266 | HUNAGUND,BENGALURU,1,"0730," 267 | ILKAL,BENGALURU,1,2015 268 | ILKAL,MANGALURU,1,1315 269 | KALABURAGI,BALLARI,3,"2131, 2214, 2230" 270 | KALABURAGI,BELAGAVI,2,"2100, 2202" 271 | KALABURAGI,BENGALURU,10,"1330, 1401, 1501, 1602, 1633, 1700, 1826, 272 | 1830, 1859, 1930" 273 | KALABURAGI,DANDELI,1,1500 274 | KALABURAGI,DAVANAGERE,4,"0715, 2004, 2100, 2130" 275 | KALABURAGI,DHARWAD,2,"1802, 2200" 276 | KALABURAGI,HOSAPETE,1,2246 277 | KALABURAGI,HUBBALLI,3,"0630, 1615, 2101" 278 | KALABURAGI,MANGALURU,1,1030 279 | KALABURAGI,MYSURU,1,1228 280 | KALABURAGI,RATHANAGIRI,1,1601 281 | KALABURAGI,SHIVAMOGGA,4,"1400, 1730, 1817, 2030" 282 | KARWAR,BALLARI,1,"0930," 283 | KARWAR,BENGALURU,3,"1515, 1900, 1945" 284 | KARWAR,CHIKKAMAGALURU,2,"0830, 0930" 285 | KARWAR,DHARMASTALA,1,"0730," 286 | KARWAR,HUBBALLI,3,"1045, 1115, 1230" 287 | KARWAR,MANIPAL,1,"0530," 288 | KATIL,BENGALURU,1,1745 289 | KEMPEGOWDA INTL AIRPORT,KUNDAPURA,1,2100 290 | KEMPEGOWDA INTL AIRPORT,MERCARA MADIKERI,2,"0100, 1130" 291 | KEMPEGOWDA INTL AIRPORT,MYSURU,18,"0201, 0300, 0401, 0530, 0800, 0915, 1030, 292 | 1230, 1346, 1500, 1603, 1700, 1800, 1915, 293 | 2015, 2115, 2230, 2355" 294 | KOLLUR,BENGALURU,1,1759 295 | FROM,TO,"NO. OF 296 | SERVICES",TIMING 297 | KOPPAL,BENGALURU,2,"2034, 2202" 298 | KUKKESUBRAMANYA,BELAGAVI,1,1515 299 | KUKKESUBRAMANYA,BENGALURU,24,"0931, 1001, 1040, 1216, 1300, 1315, 1344, 300 | 1400, 1416, 1431, 1451, 1614, 2030, 2105, 301 | 2117, 2130, 2140, 2200, 2210, 2215, 2222, 302 | 2226, 2234, 2247" 303 | KUKKESUBRAMANYA,HORANADU,1,1050 304 | KUKKESUBRAMANYA,MYSURU,3,"1245, 1401, 1431" 305 | KUMTA,BALLARI,2,"0600, 0800" 306 | KUMTA,BENGALURU,1,1801 307 | KUNDAPURA,BENGALURU,24,"0545. 0615, 0645, 0700, 0720, 0800, 0815, 308 | 0900, 1030, 1800, 1830, 1900, 1901, 1916, 309 | 1923, 1936, 1952, 1956, 1959, 2000, 2033, 310 | 2045, 2046, 2105" 311 | KUNDAPURA,KEMPEGOWDA INTL AIRPORT,1,1900 312 | KUNDAPURA,MANDYA,1,2015 313 | KUNDAPURA,MYSURU,1,2031 314 | KUPPALLI,BENGALURU,1,2100 315 | KUSTAGI,BENGALURU,1,2100 316 | LINGASUGUR,BENGALURU,2,"1951, 1959" 317 | M M HILLS,BENGALURU,2,"0830, 1430" 318 | MANDYA,KUKKESUBRAMANYA,1,2015 319 | MANDYA,KUNDAPURA,1,1915 320 | MANDYA,MANIPAL,1,2131 321 | MANGALURU,BAGALKOT,1,1930 322 | MANGALURU,BALLARI,3,"1745, 1800, 1900" 323 | MANGALURU,BELAGAVI,2,"1214, 2130" 324 | MANGALURU,BENGALURU,46,"0531, 0700, 0702, 0745, 0800, 0830, 0900, 325 | 1000, 1030, 1100, 1201, 1205, 1300, 1345, 326 | 1400, 1431, 1503, 1600, 1630, 1900, 1935, 327 | 1959, 2000, 2030, 2040, 2050, 2059, 2100, 328 | 2101, 2109, 2114, 2115, 2125, 2130, 2136, 329 | 2140, 2144, 2145, 2159, 2200, 2212, 2221, 330 | 2230, 2239, 2244, 2250" 331 | MANGALURU,BILAGI,1,2030 332 | MANGALURU,CHIKKAMAGALURU,1,"0530," 333 | MANGALURU,DAVANAGERE,2,"0645, 0730" 334 | MANGALURU,GAJENDRAGADA,1,2130 335 | MANGALURU,HASSAN,1,1600 336 | MANGALURU,HUVINA HADAGALI,1,2030 337 | MANGALURU,ILKAL,1,1801 338 | MANGALURU,KALABURAGI,1,1430 339 | MANGALURU,KOPPAL,1,2100 340 | MANGALURU,KUSTAGI,1,1850 341 | MANGALURU,MUDDEBIHAL,1,1945 342 | MANGALURU,MYSURU,5,"1300, 1531, 1600, 2030, 2159" 343 | MANGALURU,RAICHUR,1,1500 344 | MANGALURU,VIJAYAPURA,1,1515 345 | MANIPAL,BENGALURU,3,"0905, 2021, 2115" 346 | MANIPAL,DAVANAGERE,1,2145 347 | MANIPAL,MANDYA,1,1916 348 | MERCARA MADIKERI,BENGALURU,18,"0559, 0701, 0730, 0802, 1000, 1100, 1300, 349 | 1400, 1401, 1700, 2001, 2101, 2102, 2131, 350 | 2200, 2232, 2300, 2316" 351 | MERCARA MADIKERI,HUBBALLI,1,1930 352 | MERCARA MADIKERI,KEMPEGOWDA INTL AIRPORT,2,"0830, 2030" 353 | MURDESHWARA,BENGALURU,1,2001 354 | FROM,TO,"NO. OF 355 | SERVICES",TIMING 356 | MURDESHWARA,MYSURU,1,1829 357 | MYSURU,BALLARI,3,"1135, 2100, 2202" 358 | MYSURU,BELAGAVI,3,"2030, 2100, 2200" 359 | MYSURU,BENGALURU,1,"0831," 360 | MYSURU,CHIKKAMAGALURU,5,"0615, 0700, 0730, 0800, 0930" 361 | MYSURU,CHIKKODI,1,1630 362 | MYSURU,DAVANAGERE,4,"0930, 2215, 2301, 2330" 363 | MYSURU,DHARMASTALA,9,"0700, 1000, 1100, 2015, 2045, 2129, 2230, 364 | 2244, 2300" 365 | MYSURU,GADAG,2,"0900, 1731" 366 | MYSURU,GANGAVATHI,1,2232 367 | MYSURU,GOKARNA,1,"0605," 368 | MYSURU,HORANADU,1,"0915," 369 | MYSURU,HOSAPETE,3,"1931, 2000, 2200" 370 | MYSURU,HUBBALLI,4,"0800, 1030, 1200, 1845" 371 | MYSURU,KALABURAGI,1,1330 372 | MYSURU,KEMPEGOWDA INTL AIRPORT,19,"0030, 0115, 0300, 0430, 0600, 0700, 0825, 373 | 1001, 1130, 1300, 1416, 1545, 1700, 1830, 374 | 2002, 2101, 2201, 2300, 2345" 375 | MYSURU,KOLLUR,1,2200 376 | MYSURU,KUNDAPURA,1,2244 377 | MYSURU,M M HILLS,3,"0700, 0730, 0900" 378 | MYSURU,MANGALURU,6,"1700, 1900, 2214, 2240, 2301, 2335" 379 | MYSURU,MANIPAL,2,"2215, 2320" 380 | MYSURU,MURDESHWARA,1,2100 381 | MYSURU,RAICHUR,2,"1400, 1815" 382 | MYSURU,SAGARA,1,2130 383 | MYSURU,SHIVAMOGGA,3,"1500, 1650, 2330" 384 | MYSURU,SINDHANOOR,1,2115 385 | MYSURU,SIRSI,1,2115 386 | MYSURU,SRINGERI,1,2100 387 | MYSURU,TUMAKURU,1,1730 388 | MYSURU,UDUPI,1,2209 389 | MYSURU,VIJAYAPURA,2,"1300, 1545" 390 | MYSURU,YELLAPUR,1,2230 391 | NIPPANI,KALABURAGI,2,"0845, 0915" 392 | PUTTUR,BENGALURU,7,"0800, 1000, 1945, 2000, 2030, 2116, 2150" 393 | PUTTUR,HUBBALLI,1,"0745," 394 | RAICHUR,BELAGAVI,1,"2000," 395 | RAICHUR,BENGALURU,5,"1702, 2004, 2045, 2104, 2136" 396 | RAICHUR,DAVANAGERE,2,"2000, 2102" 397 | RAICHUR,DHARWAD,1,2116 398 | RAICHUR,HUBBALLI,1,2206 399 | RAICHUR,MANGALURU,1,1200 400 | RAICHUR,MYSURU,1,1216 401 | RANEBENNUR,DHARMASTALA,1,1645 402 | RANEBENNUR,YADAGIRI,1,"0930," 403 | RATHANAGIRI,KALABURAGI,1,"2045," 404 | RATHANAGIRI,VIJAYAPURA,3,"0500, 0730, 2000," 405 | RAYBAG,BALLARI,1,"0930," 406 | RAYBAG,SHIVAMOGGA,1,"0700," 407 | RON,BENGALURU,1,2015 408 | RON,MANGALURU,1,1620 409 | SADALGA,BENGALURU,1,"1900," 410 | SAGARA,BENGALURU,3,"2105, 2134, 2201" 411 | SAGARA,SHAKTINAGAR DEVASUGURU,1,1730 412 | SAUNDATTI,BENGALURU,1,1930 413 | FROM,TO,"NO. OF 414 | SERVICES",TIMING 415 | SAUNDATTI,MANGALURU,1,"0745," 416 | SHAKTINAGAR DEVASUGURU,DANDELI,1,"1900," 417 | SHAKTINAGAR DEVASUGURU,SAGARA,1,1800 418 | SHIVAMOGGA,BENGALURU,22,"0530, 0931, 1034, 1101, 1200, 1301, 1400, 419 | 1435, 1503, 1531, 1601, 1702, 2100, 2200, 420 | 2214, 2229, 2230, 2245, 2300, 2345, 2326, 421 | 2345" 422 | SHIVAMOGGA,BIDAR,1,1600 423 | SHIVAMOGGA,DHARMASTALA,1,"0930," 424 | SHIVAMOGGA,DHARWAD,1,"0545," 425 | SHIVAMOGGA,HUBBALLI,1,1300 426 | SHIVAMOGGA,KALABURAGI,2,"1900, 1932" 427 | SHIVAMOGGA,MYSURU,2,"0645, 2330" 428 | SIGANDUR,BENGALURU,2,"0645, 1900" 429 | SINDHANOOR,BENGALURU,2,"2045, 2130" 430 | SINDHANOOR,MYSURU,1,1948 431 | SIRAGUPPA,BENGALURU,1,2103 432 | SIRAGUPPA,DHARMASTALA,1,1630 433 | SIRAGUPPA,VIJAYAPURA,1,1845 434 | SIRSI,BELAGAVI,1,"0630," 435 | SIRSI,BENGALURU,9,"0630, 0700, 1145, 1900, 1930, 2015, 2100, 436 | 2118, 2137" 437 | SIRSI,KARWAR,1,"0700," 438 | SIRSI,MANIPAL,1,"0715," 439 | SIRSI,MYSURU,1,2030 440 | SOMAVARPET,BENGALURU,1,1431 441 | SRINGERI,ANEKAL,1,1845 442 | SRINGERI,BENGALURU,11,"0830, 1130, 1400, 1840, 1945, 2015, 2030, 443 | 2045, 2100, 2111, 2206" 444 | SRINGERI,KOLAR,2,"1831, 1901" 445 | SRINGERI,MYSURU,2,"0815, 2030" 446 | SULLYA,BENGALURU,3,"2030, 2100, 2216" 447 | TALIKOTE,BENGALURU,3,"1600, 1716, 1830," 448 | TALIKOTE,MANGALURU,1,1300 449 | TERADAL,BENGALURU,1,1445 450 | TIRTHAHALLI,BENGALURU,4,"2102, 2130, 214, 2215" 451 | TUMAKURU,DHARMASTALA,2,"2100, 2130" 452 | TUMAKURU,MYSURU,4,"0615, 0900, 1030, 1900," 453 | UDUPI,BAGALKOT,1,"0630," 454 | UDUPI,BENGALURU,6,"1630, 1700, 1800, 1900, 1941, 2030" 455 | UDUPI,MYSURU,3,"2000, 2014, 2215" 456 | VIDYANAGAR JINDAL,BENGALURU,1,2201 457 | VIJAYAPURA,BENGALURU,5,"1800, 1907, 2006, 2028, 2130" 458 | VIJAYAPURA,CHIKKAMAGALURU,1,1730 459 | VIJAYAPURA,MANGALURU,1,1730 460 | VIJAYAPURA,MYSURU,2,"1450, 1700" 461 | VIJAYAPURA,RATHANAGIRI,6,"0915,1030, 1930, 2031, 2100, 2131" 462 | VIJAYAPURA,SHAKTINAGAR DEVASUGURU,1,1330 463 | VIJAYAPURA,SIRUGUPPA,1,2230 464 | VIRAJPETE,BENGALURU,6,"0536, 0900, 1117, 1331, 1606, 2300" 465 | YADAGIR,BENGALURU,1,1900 466 | YADAGIR,DAVANAGERE,1,"0845," 467 | YADAGIR,DHARWAD,1,2102 468 | YALABURGA,BENGALURU,5,"1801, 1902, 1903, 2000, 2030" 469 | YELLAPUR,MANGALURU,3,"0930, 1130, 1430" 470 | YELLAPUR,MYSURU,2,"0900, 1730," 471 | -------------------------------------------------------------------------------- /nbs_healthcare/HealthcareCapacity_StatewiseBedCounts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "url = \"https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Requirement already satisfied: lxml in /Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages (4.5.0)\r\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "import json\n", 27 | "import random\n", 28 | "from pathlib import Path\n", 29 | "\n", 30 | "# !pip install camelot\n", 31 | "import camelot\n", 32 | "\n", 33 | "!pip install lxml\n", 34 | "import pandas as pd\n", 35 | "\n", 36 | "%load_ext autoreload\n", 37 | "%autoreload 2\n", 38 | "Path.ls = lambda x: list(x.iterdir())" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "df = pd.read_html(url, header=None)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/html": [ 58 | "
\n", 59 | "\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
012345
0States/UTsRural hospitalsRural hospitalsUrban hospitalsUrban hospitalsAs on
1NaNNo.BedsNo.BedsAs on
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | " 0 1 2 3 \\\n", 109 | "0 States/UTs Rural hospitals Rural hospitals Urban hospitals \n", 110 | "1 NaN No. Beds No. \n", 111 | "\n", 112 | " 4 5 \n", 113 | "0 Urban hospitals As on \n", 114 | "1 Beds As on " 115 | ] 116 | }, 117 | "execution_count": 4, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "govt_hospitals = df[1]\n", 124 | "govt_hospitals[:2]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 5, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "headers = [\"State\", \"RuralHospitalsCount\", \"RuralBeds\", \"UrbanHospitals\", \"UrbanBeds\", \"Date\"]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "['State',\n", 145 | " 'RuralHospitalsCount',\n", 146 | " 'RuralBeds',\n", 147 | " 'UrbanHospitals',\n", 148 | " 'UrbanBeds',\n", 149 | " 'Date']" 150 | ] 151 | }, 152 | "execution_count": 6, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "headers" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 7, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "govt_hospitals.columns = headers" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "gvt_hosp_df = govt_hospitals[2:]" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 9, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "gvt_hosp_df.reset_index(drop=True, inplace=True)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 10, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "from dateutil.parser import parse\n", 195 | "\n", 196 | "def date_parser(date: str):\n", 197 | " \"\"\"\n", 198 | " Argument:\n", 199 | " date(str): Input string\n", 200 | " \n", 201 | " Returns:\n", 202 | " dateutil object\n", 203 | " \"\"\"\n", 204 | " try:\n", 205 | " date = str(date)\n", 206 | " return parse(date)\n", 207 | " except:\n", 208 | " return None\n", 209 | " \n", 210 | "# gvt_hosp_df[\"Date\"] = gvt_hosp_df[\"Date\"].apply(parse)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 11, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stderr", 220 | "output_type": "stream", 221 | "text": [ 222 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/pandas/core/indexing.py:670: SettingWithCopyWarning: \n", 223 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 224 | "\n", 225 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 226 | " self._setitem_with_indexer(indexer, value)\n", 227 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 228 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 229 | "\n", 230 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 231 | " \"\"\"Entry point for launching an IPython kernel.\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "gvt_hosp_df.loc[36, \"Date\"] = \"01.01.2016\"" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 12, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stderr", 246 | "output_type": "stream", 247 | "text": [ 248 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 249 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 250 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 251 | "\n", 252 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 253 | " \"\"\"Entry point for launching an IPython kernel.\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "gvt_hosp_df[\"Date\"] = gvt_hosp_df[\"Date\"].apply(parse)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 13, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stderr", 268 | "output_type": "stream", 269 | "text": [ 270 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 271 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 272 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 273 | "\n", 274 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 275 | " \"\"\"Entry point for launching an IPython kernel.\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "gvt_hosp_df[\"Source\"] = \"https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\"" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 14, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stderr", 290 | "output_type": "stream", 291 | "text": [ 292 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 293 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 294 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 295 | "\n", 296 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 297 | " \"\"\"Entry point for launching an IPython kernel.\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "gvt_hosp_df[\"State\"] = gvt_hosp_df[\"State\"].apply(lambda x: x.replace(\"*\", \"\"))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 29, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "gvt_hosp_df.to_csv(\"Hospitals.csv\", index=False)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 31, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "name": "stdout", 321 | "output_type": "stream", 322 | "text": [ 323 | "State,RuralHospitalsCount,RuralBeds,UrbanHospitals,UrbanBeds,Date,Source\r\n", 324 | "Andhra Pradesh,193,6480,65,16658,2017-01-01 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 325 | "Arunachal Pradesh,208,2136,10,268,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 326 | "Assam,1176,10944,50,6198,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 327 | "Bihar,930,6083,103,5936,2016-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 328 | "Chhattisgarh,169,5070,45,4342,2016-01-01 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 329 | "Goa,17,1405,25,1608,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 330 | "Gujarat,364,11715,122,20565,2016-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 331 | "Haryana,609,6690,59,4550,2016-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 332 | "Himachal Pradesh,705,5665,96,6734,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "!head Hospitals.csv" 338 | ] 339 | } 340 | ], 341 | "metadata": { 342 | "kernelspec": { 343 | "display_name": "Python 3", 344 | "language": "python", 345 | "name": "python3" 346 | }, 347 | "language_info": { 348 | "codemirror_mode": { 349 | "name": "ipython", 350 | "version": 3 351 | }, 352 | "file_extension": ".py", 353 | "mimetype": "text/x-python", 354 | "name": "python", 355 | "nbconvert_exporter": "python", 356 | "pygments_lexer": "ipython3", 357 | "version": "3.7.5" 358 | } 359 | }, 360 | "nbformat": 4, 361 | "nbformat_minor": 2 362 | } 363 | -------------------------------------------------------------------------------- /nbs_healthcare/NHP 2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/coronaIndia/73099b2b6fca51caf7528e96ddad6baed8fe074f/nbs_healthcare/NHP 2018.pdf -------------------------------------------------------------------------------- /nbs_scratchpad/2020-04-06_Plotting_Test_Curves.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip install altair_viewer" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Covid in India: Possible Scenario\n", 17 | "Projected actual and detected cases on a log scale. Testing capacity starts at 1000 tests, rapidly increased over a period of time before maxing out at 100,000 tests. Based on the ICMR testing protocol, most tests are reserved for those with symptoms but some (e.g. private labs) are available on demand. " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import altair as alt\n", 27 | "import pandas as pd\n", 28 | "from dateutil.parser import parse" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "detected = [\n", 38 | " 0,\n", 39 | " 0,\n", 40 | " 0,\n", 41 | " 5,\n", 42 | " 11,\n", 43 | " 19,\n", 44 | " 31,\n", 45 | " 48,\n", 46 | " 76,\n", 47 | " 137,\n", 48 | " 296,\n", 49 | " 764,\n", 50 | " 2153,\n", 51 | " 4963,\n", 52 | " 10298,\n", 53 | " 19660,\n", 54 | " 34819,\n", 55 | " 57705,\n", 56 | " 90443,\n", 57 | " 135468,\n", 58 | " 195589,\n", 59 | " 265205,\n", 60 | " 333128,\n", 61 | " 397183,\n", 62 | " 454692,\n", 63 | " 502757,\n", 64 | " 539265,\n", 65 | " 564176,\n", 66 | " 579624,\n", 67 | " 588601,\n", 68 | " 593674,\n", 69 | " 596572,\n", 70 | " 598308,\n", 71 | " 599440,\n", 72 | " 600261,\n", 73 | " 600924,\n", 74 | " 601507,\n", 75 | "]" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "actual = [\n", 85 | " 3,\n", 86 | " 13,\n", 87 | " 45,\n", 88 | " 147,\n", 89 | " 473,\n", 90 | " 1516,\n", 91 | " 4853,\n", 92 | " 15530,\n", 93 | " 44347,\n", 94 | " 122071,\n", 95 | " 331322,\n", 96 | " 705400,\n", 97 | " 1369398,\n", 98 | " 2533207,\n", 99 | " 4528267,\n", 100 | " 7820144,\n", 101 | " 12917856,\n", 102 | " 20064142,\n", 103 | " 28748371,\n", 104 | " 37555999,\n", 105 | " 44908712,\n", 106 | " 50063228,\n", 107 | " 53232297,\n", 108 | " 55021583,\n", 109 | " 55982701,\n", 110 | " 56485037,\n", 111 | " 56743817,\n", 112 | " 56876132,\n", 113 | " 56943526,\n", 114 | " 56977785,\n", 115 | " 56995183,\n", 116 | " 57004014,\n", 117 | " 57008495,\n", 118 | " 57010769,\n", 119 | " 57011923,\n", 120 | " 57012508,\n", 121 | " 57012805,\n", 122 | "]" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "True" 134 | ] 135 | }, 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "len(actual) == len(detected)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "dates = [\n", 152 | " \"Feb. 1\",\n", 153 | " \"Feb. 6\",\n", 154 | " \"Feb. 11\",\n", 155 | " \"Feb. 16\",\n", 156 | " \"Feb. 21\",\n", 157 | " \"Feb. 26\",\n", 158 | " \"Mar. 2\",\n", 159 | " \"Mar. 7\",\n", 160 | " \"Mar. 12\",\n", 161 | " \"Mar. 17\",\n", 162 | " \"Mar. 22\",\n", 163 | " \"Mar. 27\",\n", 164 | " \"Apr. 1\",\n", 165 | " \"Apr. 6\",\n", 166 | " \"Apr. 11\",\n", 167 | " \"Apr. 16\",\n", 168 | " \"Apr. 21\",\n", 169 | " \"Apr. 26\",\n", 170 | " \"May. 1\",\n", 171 | " \"May. 6\",\n", 172 | " \"May. 11\",\n", 173 | " \"May. 16\",\n", 174 | " \"May. 21\",\n", 175 | " \"May. 26\",\n", 176 | " \"May. 31\",\n", 177 | " \"Jun. 5\",\n", 178 | " \"Jun. 10\",\n", 179 | " \"Jun. 15\",\n", 180 | " \"Jun. 20\",\n", 181 | " \"Jun. 25\",\n", 182 | " \"Jun. 30\",\n", 183 | " \"Jul. 5\",\n", 184 | " \"Jul. 10\",\n", 185 | " \"Jul. 15\",\n", 186 | " \"Jul. 20\",\n", 187 | " \"Jul. 25\",\n", 188 | " \"Jul. 30\",\n", 189 | "]" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 7, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "dates = [parse(x) for x in dates]" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 8, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "assert(len(dates) == len(detected) == len(actual))" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 9, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "df = pd.DataFrame({\"Detected\": detected, \"Date\": dates, \"Actual\": actual})\n", 217 | "df.set_index('Date', inplace=True)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 10, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "" 229 | ] 230 | }, 231 | "execution_count": 10, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | }, 235 | { 236 | "data": { 237 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEVCAYAAAAb/KWvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd3wVZfb48c9JpfciEEIHKVIDrAqsKCgqCHZAURBFVMTV1ZXfWsDVr+KuuhZckVUERYMKSlEUy4qogJJQpITeEkASCISakOSe3x9zgYiUhNybueW8X6+87p1nZu6cScLhyZlnnhFVxRhjTGiJcDsAY4wxvmfJ3RhjQpAld2OMCUGW3I0xJgRZcjfGmBBkyd0YY0KQJXdjjAlBltyNMSYERfn6A0UkHngVyATWqepYXx/DGGPMmRUquYvIRKA3kK6qrQq09wJeASKBt7yJ/AJgmqpOEZEPC/P51apV0/r16xc1dmOMCWvJycm7VbX6qdZJYaYfEJFuwEHg3WPJXUQigXVATyANWAwMAHYB0wAF3lPVd872+QkJCZqUlFS4szHGGAOAiCSrasKp1hWq5q6q83HKLAV1Ajao6iZVPQpMBfoCQ4DRqnopcPW5h22MMeZcFeeCah0gtcBymrftS2CkiIwHtpxuZxEZJiJJIpKUkZFRjDCMMcaczOcXVFV1JXBDIbabICI7gT4xMTEdfB2HMcaEs+Ik9+1A3QLLcd62QlPV2cDshISEu05el5ubS1paGtnZ2cUIMbyVKlWKuLg4oqOj3Q7FGFPCipPcFwNNRKQBTlLvDwwsygeISB+gT+PGjf+wLi0tjfLly1O/fn1EpBhhhidVZc+ePaSlpdGgQQO3wzHGlLBC1dxFJBFYCDQTkTQRGaqqecAIYC6QAnykqqt8FVh2djZVq1a1xH6ORISqVavaXz7GhKlC9dxVdcBp2ucAc8714GcqywCW2IvJvn/GZ1RBPSdeOdWy/v712H4FX09up8BQ7FO1nSmewgVeyO1Cj88vqBbFmcoygSAyMpILLriA3NxcoqKiuO2223jwwQeJiDj9HzxbtmxhwYIFDBxYpArVcZMmTeLyyy+ndu3ahd5ny5Yt9O7dm5UrV57TMU2AU4Wc/XB4DxzOdL6OHoDcI3D0MOQe8r4/BLmHvW2HIS8H8o+CJ895zc91vjy5v1/WfPDke5O1x/s+/8Sretz+Dphz4GpyP1vP3W2lS5dm2bJlAKSnpzNw4ED279/PU089ddp9tmzZwgcffFCs5N6qVasiJXcTxLL3w97NkLkJMjdDVmqBJL7nxJcn7+yfFV0GoktDdFmIKQNRsRAZAxHREFUKYitAZLTzFRHtXRcJEVHOq0SARJ54HxHpLB97j3i3wXk9viwn3kOB5WN/OcqJtuPrC/jd8mm2OaVC/mUayn/BPjXktKus515INWrUYMKECXTs2JExY8bg8XgYNWoU8+bNIycnh/vuu4+7776bUaNGkZKSQtu2bbn99tsZOXLkKbcDeP7555kyZQoRERFceeWVJCQkkJSUxC233ELp0qVZuHAhq1ev5qGHHuLgwYNUq1aNSZMmUatWLZKTk7njjjsAuPzyy9381pizyc+D9FWwa7U3kXuT+d7NTuIuqHQVKFsdylSFKg0hLsF5X/CrdBWILedN5mW8ibw0nOEvShOqAjS5F7bn/tTsVazesd+nx25RuwKj+7Qs0j4NGzYkPz+f9PR0Zs6cScWKFVm8eDE5OTlcfPHFXH755YwdO5YXXniBzz77DIAJEyaccrs1a9Ywc+ZMfv75Z8qUKUNmZiZVqlRh3LhxvPDCCyQkJJCbm8v999/PzJkzqV69Oh9++CGPPfYYEydOZMiQIYwbN45u3brxyCOP+PR7Y4rpyD5IS4LURZD6M6QlO6UTcHq3FeKgSn1o3gcqN4AqDU68xpZ3NXQTOlxN7sHsq6++4tdff2XatGkAZGVlsX79emJiYgq13TfffMOQIUMoU6YMAFWqVPnDMdauXcvKlSvp2bMnAPn5+dSqVYt9+/axb98+unXrBsCgQYP44osv/Hau5iwyN8PWBU4iT/0FMlKcdomE81pBu1ugbmc4rzVUrueUS4zxs6AoyxS1h+0vmzZtIjIykho1aqCqvPbaa1xxxRW/22bevHm/Wz7ddnPnzj3r8VSVli1bsnDhwt+179u379xOwPhO5mZY9Sms+gR+W+G0laroJPELrndea7d3yifGuMDVIp2qzlbVYRUrVnQzjELJyMhg+PDhjBgxAhHhiiuu4I033iA3NxeAdevWcejQIcqXL8+BAweO73e67Xr27Mk777zD4cOHAcjMdOZlK7h/s2bNyMjIOJ7cc3NzWbVqFZUqVaJSpUr8+OOPALz//vsl800Id/tS4adXYcIl8Gpb+PYpiIyFK56Fe3+Gv22BWz6Gbo9Ag26W2I2rrCxzBkeOHKFt27bHh0IOGjSIhx56CIA777yTLVu20L59e1SV6tWrM2PGDFq3bk1kZCRt2rRh8ODBPPDAA6fcrlevXixbtoyEhARiYmK46qqrePbZZxk8eDDDhw8/fkF12rRpjBw5kqysLPLy8vjLX/5Cy5Yteeedd7jjjjsQEbug6k/7d8CqGU4vPe0Xp61WW+j5D2jRzymzGBOACjWfu98OfqIsc9f69et/ty4lJYXmzZu7E1gIse/jOUr9BX56BdZ8DijUvABa9oOW10LVRm5HZwxw5vncg2K0jDElwuOBtXNgwavOxdFSlaDLg9BmAFRv6nZ0xhSJlWWMyc2G5YmwcBzs2QCV4qHX89DuVqubm6Blyd2Er8OZsPht+OVNOJTh1NJvmAjN+0Kk/dMwwc1+g034ObIPfngRFr/lzMHSuCdcPBLqdw3tW9VNWAmKce7G+ER+LiRNhHlj4cheaH0zXPwA1GzhdmTG+JxdUDWhT9W5UPr1k05NvcGf4fJnoFZrtyMzxm9spqGzmDFjBiLCmjVrzrjdyy+/fPyGpHMxadIkRowYcc77m9PYsRQm94GpA515XQZ+BLfNtMRuQp4l97NITEykS5cuJCYmnnG74iZ342NZafDJ3c7dpOkpcPWLcM8CaHqF1dVNWPB5cheRriIyXkTeEpEFvv78knTw4EF+/PFH3n77baZOnQo4k3c9/PDDtGrVitatW/Paa6/x6quvsmPHDrp370737t0BKFfuxBC6adOmMXjwYABmz55N586dadeuHT169GDXrl0lfl4hLTcbvnsWXuvg3FXa5UEYuQQ63unMY25MmChUzV1EJgK9gXRVbVWgvRfwChAJvKWqY1X1B+AHEemH8xDt4vti1InJmXzlvAvgyrFn3GTmzJn06tWLpk2bUrVqVZKTk/nll1/YsmULy5YtIyoq6vhUvS+99BLfffcd1apVO+NndunShUWLFiEivPXWW/zzn//kxRdf9OWZha/NP8DsByBzI7S6AXqMdsasGxOGCntBdRIwDnj3WIOIRAKvAz2BNGCxiMxS1dXeTQYCQ30XaslLTEzkgQceAKB///4kJiayefNmhg8fTlSU86071VS9Z5KWlsbNN9/Mzp07OXr0KA0aNPB53GHnyF746glY+h5Urg+DZkCj7m5HZYyrCvuA7PkiUv+k5k7ABlXdBCAiU4G+wGoRiQeyVPUAvnCWHrY/ZGZm8r///Y8VK1YgIuTn5yMidOzYsVD7F3w4dXZ29vH3999/Pw899BDXXHMN8+bNY8yYMb4OPXyoOqWXLx51nmh08QPw51HOk4mMCXPFqbnXAVILLKd528Dpsb9zpp1FZJiIJIlIUkZGRjHC8I9p06YxaNAgtm7dypYtW0hNTaVBgwa0adOGN998k7w855mWp5qqF6BmzZqkpKTg8Xj49NNPj7dnZWVRp47zbZo8eXIJnlGIyUqDxP4wbQhUqA3DvnNmarTEbgzgp9EyqjpaVc94MVVVJwBPAUtOfnpRIEhMTOTaa6/9Xdv111/Pzp07iY+Pp3Xr1rRp04YPPvgAgGHDhtGrV6/jF1THjh1L7969ueiii6hVq9bxzxgzZgw33ngjHTp0OGt93pyCJx9+fhNe7wyb58Pl/wd3fgu12rgdmTEBpdBT/nrLMp8du6AqIhcCY1T1Cu/y/wNQ1eeKGkRCQoImJSX9rs2mqvWNkPo+ZqyDGffA9iRodBn0fsmpsRsTpvw15e9ioImINAC2A/1xLqIWJTCbfsCcnccDv0yAb0ZDdGm47r9wwY02Xt2YMyjsUMhE4BKgmoikAaNV9W0RGQHMxRkKOVFVV/ktUhOesrbDzHth0zxocjlc8xqUP8/tqIwJeIUdLTPgNO1zgDnnenCbW8ac0Ypp8PlDzoRfvf8NHYZYb92YQgroWSFV9XdDCk3RuPkIxWI5nAmf/xVWfQJxHeHaN+3RdsYUkatzy6jqbFUdVrFixT+sK1WqFHv27AneBOUyVWXPnj2UKlXK7VCKZsM38MZFkDILLn0chnxpid2YcxCwPfe4uDjS0tIIxDHwwaJUqVLExcW5HUbhHD3sTMm7+L9QrRkMmAq127odlTFBq9BDIf3pVEMhTRjZtdq5GSljDfzpXrjsSWdUjDHmjPw1FNKY4lGF5Enw5SiILQ+DPoVGl7odlTEhIWDLMibEZWc5Mziu+hQadncumpav6XZUxoSMgL2gakJYWjKM7wqrZ8Flo+HWTyyxG+NjVpYxJcfjgUWvwzdjoHwtGPIFxHd2OypjQpKVZUzJOLQbPh0OG76G83tD33FQurLbURkTsqwsY/xv8w8wvoszi+NVL8DNUyyxG+NnVpYx/qMKP70C3z4FVRrCwI+gVmu3ozImLFhyN/6Rvd+Z8CtlNrTo55RhYsu7HZUxYcOSu/G99BT48FbI3Ow8TOPC+2zCL2NKmF1QNb61YhrMuh9iysHts6H+xW5HZExYsguqxjfyc+HL/wfTh8J5F8Dd8y2xG+MiK8uY4jvwG3w8GLYthM7DoefTEBV4z8U1JpxYcjfFs3WBk9hzDsD1b8MFN7gdkTEGPyR3EYkAngYqAEmqOtnXxzABImkizHkEKtWDQTOgZgu3IzLGeBWq5i4iE0UkXURWntTeS0TWisgGERnlbe4LxAG5QJpvwzUBwZPv1Nc/exAaXgLDvrPEbkyAKewF1UlAr4INIhIJvA5cCbQABohIC6AZsEBVHwLu8V2oJiBk74fEAbDoP059fcCHUMouiBsTaAr7gOz5IlL/pOZOwAZV3QQgIlNxeu2pwFHvNvm+CdMEhL1bIbE/ZKyFq1+CjkPdjsgYcxrFqbnXwUnkx6QBnYFXgNdEpCsw/3Q7i8gwYBhAfHx8McIwJWLbzzB1oDPk8dbp0Ki72xEZY87A5xdUVfUwcNYunapOACaA85g9X8dhfOjXj2DmfVChjjM/TPWmbkdkjDmL4tzEtB2oW2A5zttWaCLSR0QmZGVlFSMM4zceD/zvGfjkLojrBHf9zxK7MUGiOD33xUATEWmAk9T7AwN9EpVxX+4RZ/711TOg3a1w9b/txiRjgkhhh0ImAguBZiKSJiJDVTUPGAHMBVKAj1R1VVEObtMPBKicAzDlelg907nb9JpxltiNCTKFHS0z4DTtc4A553pwmzgsAB3ZC1NugB1L4fq37I5T44p8j5Kb7yHPo+Tle8jNV/I8HvLyT7Tne788qniU4+/zPYrH47Q56xRVUBSPx2lTQL3tHu86Zxun/RgtsO7E8rH3evw93u0K7udt/t2y06an3KZgo/6x6fgxC8umHzAnHNoN7/Vzhjre9C407+12RCYI5OTlk3noKFlHcjmQnceB7GOvztfBnBPLh3LyyM7zkJ2bT05uPtm5HnLynNfsvHynPc9DEXKYOQ0pyv8E/pKQkKBJSUluhxHeDvwG7/aFvVug//vQuIfbERmX7c/OJS3zCNv3HSHjQA67Dxb8Ouq8Hshhf3beGT8nMkIoXyqKcrHOV2x0JKWiIo6/loqOpFT0sddIYiIjiI6MICpSiI4UoiIinNfICKIihOjICCIjhKgIISJCiBQhMkIQcY4VKd72CEGAiGOv4mxz7FUQIiKcV2f52GMH5PjjB5w2KbDuxPbHHG/3bveHdm/r7/Y56U3BPQse+0RbgfUF2quUi01W1YRTfd+t525gXyq8ew0c2AW3TIMGXd2OyJSAnLx8tu45TNrew6RmHjn+mrr3MGl7j5B1JPcP+1QsHU3VcjFUKxdL8/MqUK1xDFXLxVKtXCyVykQfT+LlS0VToVQU5UpFUTo68nfJyZQMe1hHuNuz0emxZ++H22ZC3Y5uR2R8LC/fw9bMw6z77QBrdx1g3a4DrP3tAFv2HCbfc+Iv99ioCOIql6ZulTK0i69E3cplqFulDHUqlaZGhViqlo0lJsrVR0CYIrCyTDhLX+Mkdk8uDPoUarVxOyJTTNm5+azcnsXSbftYvXM/a387wIaMgxzN8wDOn/z1qpShac3yNDuvPI1rlKNulTLEVS5N9XKx1sMOMiISmGUZ67m7aOevzsXTiCgYPAdqnO92ROYc7Nh3hCXb9rJk6z6WbNvLqh1Z5OY7HbZaFUvRtGZ5ujSp5iTzmk4yLx0T6XLUpiRYzz0cpSXDlGshpjzcPguqNnI7IlMIqsrGjEPMX5dB8ta9JG/dy2/7swGnpNImrhLt6lWifXxl2sdXpnr5WJcjNv4WsD1344KsNPjgJihd2XmAdSWbtC2QHTmaz6JNe/hubTrfrU0nNfMIAHUqlaZTgyq0j69E+3qVaV6rAtGRVg83J1hyDye52fDhIMjLgSFfWGIPUNv2HD6ezBdu3ENOnofS0ZFc3Lgqd3drxCXNqhNXuYzbYZoAZzX3cPLF32DHErh5ik0AFmC27zvCx0mpzFq+g00ZhwBoUK0sAzvH071ZDTo1qEKpaKuVm8JzNbmr6mxgdkJCwl1uxhEWkifBksnQ9a/QvI/b0RicceZfrdrFR0mp/LhhNwAXNqzKoD/V45JmNWhQrazLEZpgZmWZcJCW5DzIutFl0P0xt6MJeyk79/Ph4lRmLNvOvsO51KlUmpGXNuGGDnHUrWLlFuMbltxD3cF0p85evpYzEViE/WnvhgPZucxctoOPklL5NS2LmMgIerasyc0Jdbm4cTUiI2x8ufEtq7mHsvw8+HgIHMmEoV9DmSpuRxR20vdn8/ZPm/lg0TYO5ORx/nnlGd2nBf3a1qFyWZtG2fiP1dxD2TejYeuPcO0EqNXa7WjCyqaMg0yYv4lPlmwnz+PhqgtqcWfXhrSJq2h3gZoSYWWZULViGiwcB53uhjY3ux1N2Fieuo/x32/ky1W/ERMZwU0d47ira0PqVbWLo6ZkWXIPRb+thFn3Q/yFcPkzbkcT8lSV+et3M37eRhZu2kOFUlHce0kjBl/UwO4SNa7xeXIXkUuAp4FVwFRVnefrY5gzOLIXPrwVYivAjZPt8Xh+pKp8m5LOy9+uY+X2/dSsEMtjVzVnQOd4ysVav8m4q1C/gSIyEegNpKtqqwLtvYBXgEjgLVUdi/OEqINAKSDN5xGb0/N44JO7nSkGhsyB8jXdjigkHeupv/T1Opan7qNe1TL88/rW9G1Xm9goG41kAkNhuxeTgHHAu8caRCQSeB3oiZPEF4vILOAHVf1eRGoCLwG3+DRic3rz/wXr58JVL0DdTm5HE5IWbtzDi1+tJWnrXupUKs3z11/Ade3jbF4XE3AK+4Ds+SJS/6TmTsAGVd0EICJTgb6qutq7fi9gBceSsv4bmPcctO4PHe90O5qQk7w1kxe/WseCjXuoWSGWp/u14uaEuvbwChOwilMYrAOkFlhOAzqLyHXAFUAlnN7+KYnIMGAYQHy8TWBVLHu3wvShULMl9P737x/WaIpleeo+Xvp6Hd+vy6BauRie6N2CWzrH2zwvJuD5/KqPqn4CfFKI7SYAE8CZz93XcYSN3Gz4aBCowk3vQozdvu4LaXsPM/aLNXz2604ql4lm1JXnc9uF9SgTYxdKTXAozm/qdqBugeU4b1uh2R2qPjDnYdi5HAZMtYdu+MChnDzemLeR//6wCREYeVkThnVraKNfTNApzm/sYqCJiDTASer9gYE+icoUTvJkWPoedH0Yml3pdjRBzeNRpi9J419z15J+IId+bWvzt17nU7tSabdDM+acFHYoZCJwCVBNRNKA0ar6toiMAObiDIWcqKqrinJwm36gGHYsdWZ6bNgduv/d7WiC2i+bM3n6s9Ws2J5F27qVGD+oA+3jK7sdljHFUtjRMgNO0z4HmHOuB7eyzDk6nAkf3gZlq8P1b9tMj+coNdOpq3++Yie1Kpbilf5tuaZNbZv7xYQEmzgs2HjyYfqdcPA3GPIllK3qdkRBJycvn9e/28j47zcSKcKDPZoyrFtDSsfYf5ImdNiUv8Hm++dh47fOkMe4Dm5HE3SStmTy6PRf2ZhxiL5tazPqyvOpVdHq6ib0WM89mKyb6yT3NgOhwxC3owkqB7Jzef7LNUxZtI06lUoz+Y5O/LlpdbfDMsZvrOceLPZugU/ugvMugN4v2Y1KRfD16l08MWMl6QeyGdqlAQ/1bEpZG9poQpz13INBXg58PNiZku2m9yDaygiFkX4gm6dmrebzFTs5/7zyjB/UgbZ1K7kdljElwnruweDrJ52hjzdPgSoN3I4m4KkqHyel8cznq8nO8/DIFc0Y1q2hTe5lwor13APd6lnw83jofA807+N2NAFv1/5s/vrRcn7csJtO9avw3PUX0Kh6ObfDMqbEWeExkGVuhpkjoHZ76PkPt6MJeN+s3sUj05aTnevhmX6tGNgpnogIuzZhwpMl90B1rM4uwI3v2BOVziA7N5/n5qQweeFWWtSqwGsD21lv3YQ9q7kHqq+egJ3L4Ob3oXJ9t6MJWOt3HeD+xKWs+e0AQ7s04G+9mtnTkIwBXL3CpKqzVXVYxYoV3Qwj8KyaAb+8CX+6D5r3djuagKSqfPDzNvqM+5GMAzm8M6QjT/RuYYndGC8rywSazE0w636o0wF6jHE7moC07/BRRk1fwZerfqNrk2q8eFMbapQv5XZYxgQUS+6B5HidXeDGSVZnP4WfN+3hLx8uI+NADn+/6nzu7NLQLpoacwpWcw8kcx9zHrzRPxEq2aMHC1JV3v5xM8/OSSG+Shk+ufciWsfZDUnGnI7V3APFqk9h8X/hwhFw/lVuRxNQsnPzefjjX3nm8xSuaHken43saondmLOwskwg2LMRZt4PcR2tzn6S9P3Z3D0lmaXb9vGXHk0YeWkTK8MYUwiW3N2Wn+dMCBYRCTdMhMhotyMKGL+m7WPYu8lkHcnljVvac+UFtdwOyZigYcndbQtege3JTmK3Ovtxs5bv4JGPl1OtXCzT77mIFrUruB2SMUHFLzV3ESkrIkkiYoO0z+S3lfDdc9DyWmh1vdvRBASPR/nX3DWMTFxKm7hKzBxxsSV2Y85BoZK7iEwUkXQRWXlSey8RWSsiG0RkVIFVjwIf+TLQkJN3FD4dDqUrwVUvuh1NQDiQncuw95J4/buNDOgUz5Q7O1OtXKzbYRkTlApblpkEjAPePdYgIpHA60BPIA1YLCKzgDrAasDuKjmT+f+CXSucYY/2HFRSMw8zdPJiNmYc4um+Lbn1T/XsQdXGFEOhkruqzheR+ic1dwI2qOomABGZCvQFygFlgRbAERGZo6oen0UcCrYnww8vQpsBNuwRSNm5n9sm/kJuvof37ujERY2ruR2SMUGvOBdU6wCpBZbTgM6qOgJARAYDu0+X2EVkGDAMID4+jC4k5mbDp/dAuZrQa6zb0bgueWsmQ95ZTJmYKBKHX0jjGuXdDsmYkOC30TKqOuks6yeIyE6gT0xMTAd/xRFwvnsGdq+FW6c79fYwNm9tOsOnJFOrYmneG9qJuMpl3A7JmJBRnOS+HahbYDnO22ZOZ9siWDAOOgyBxj3cjsZVs5bv4KEPl9HsvPJMvqOTXTg1xseKMxRyMdBERBqISAzQH5hVlA8Iq+kHjh5yRsdUqguXP+12NK56b9FWHpi6lPb1KpM47E+W2I3xg8IOhUwEFgLNRCRNRIaqah4wApgLpAAfqeqqohxcRPqIyISsrKyixh18vh4NezdDvzcgNjzryqrKa9+u54kZK7ns/Bq8e0cnKpSyO3KN8QdRVbdjICEhQZOSktwOw382zYN3+zoPub4yPC+iejzKM5+nMPGnzVzXrg7P39Ca6EhX560zJuiJSLKqJpxqnU3562/Z+52HXFdtDJc96XY0rsjL9/Do9BVMX5LGkIvr88TVLWzyL2P8zKb89be5f4f926HfeIgJv9Egufke7vtgCdOXpPFQz6Y82dsSuzElwdXkHvI193VzYel7cNFIqNvR7WhKnMejPPLxcuau2sWTvVsw8rImdtepMSXEeu7+cjgTZo2EGi2g+9/djqbEqSpPzFzJjGU7eOSKZtzRpYHbIRkTVqzn7i9f/A0O73ZGx0SF11A/VWXsF2t4/+dt3HNJI+7rHsLXVIwJUNZz94fVM2HFx9DtEajd1u1oStzr323gzfmbGPSnevztimZuh2NMWLKxaL52MAM+exBqtYGuf3U7mhI36afNvPDVOq5rV4enrmlpNXZjXGLJ3ZdU4fMHIecAXPtm2D0y7+OkVMbMXs0VLWvyzxta26gYY1xkNXdfWjENUmZD98egRnO3oylRc1bs5NHpv9K1STVeHdCOKLtByRhXWc3dV/bvhDl/hbhOcNH9bkdTouatTXfmiomvzJuDOhAbFel2SMaEPete+YIqzB7pPDqv3xsQET7J7edNexg+JZmmNcszcUhHysTYM9eNCQT2L9EXlr4H67+CXs9DtfAZ9rch/SBDJydRp1JpmwTMmABjPffi2rcNvvw71O8KnYa5HU2JOXw0j3vfTyYmKoL3hnamqk3ba0xAsQuqxeHxwMz7AIW+4yAiPP6vVFUen7GS9ekHeaV/W2pXKu12SMaYk9gF1eJIehs2z4fLn4HK9d2OpsR8lJTKJ0u2M/LSJnRtUt3tcIwxpxAeXU1/2LMRvn4SGl0GHQa7HU2JWb1jP0/OXEWXxtUYeVkTt8MxxpyGJfdz4cmHGfdCRDRc8xqEyV2Y+7Nzuff9ZCqViebl/m2JtJuUjAzWlWMAABWXSURBVAlYPh8tIyLNgQeAasC3qvqGr4/hukX/gdRFzhztFeu4HU2JUFUenfYrqXuPMNWee2pMwCvsM1Qniki6iKw8qb2XiKwVkQ0iMgpAVVNUdThwE3Cx70N2Wfoa+PZpaHY1tOnvdjQl5p2ftvDFyt/42xXN6Fi/itvhGGPOorBlmUlAr4INIhIJvA5cCbQABohIC++6a4DPgTk+izQQ5OfBjHsgpiz0eTlsyjFLtu3l2Tkp9Ghek2HdGrodjjGmEAqV3FV1PpB5UnMnYIOqblLVo8BUoK93+1mqeiVwiy+Ddd1P/4YdS+DqF6FcDbejKRF7Dx1lxPtLOK9iKV68sY3N8mhMkChOzb0OkFpgOQ3oLCKXANcBsZyh5y4iw4BhAPHx8cUIo4T8tgLmPQ8tr4NW17kdTYnweJQHP1rG7oNHmXbPhVQsY3egGhMsfH5BVVXnAfMKsd0EEdkJ9ImJieng6zh8Ku8ofDocSld2eu1h4o3vNzJvbQZP921J67hKbodjjCmC4iT37UDdAstx3rbQM/+fsGsl9E+EMuFxMXHBht28+NVa+rSpza1/qud2OMaYIirOOPfFQBMRaSAiMUB/YFZRPiAo7lDdngw/vARtBsL5V7kdTYnYsvsQ97y/hEbVy/HcdRdYnd2YIFTYoZCJwEKgmYikichQVc0DRgBzgRTgI1VdVZSDB/zcMrnZ8Ok9UK4m9HrO7WhKRNaRXIZOXkyEwNu3d6RcrE0cakwwKtS/XFUdcJr2ORRjuKOqzgZmJyQk3HWun+FX3z0Du9fCrZ9A6dCvOefle7g/cSlb9xxmyp2dia9axu2QjDHnyGaFPJ1ti2DBOOgwBBpf5nY0JeL/5qQwf10Gz/RrxZ8aVnU7HGNMMdiskKdy9JAzOqZSXbj8abejKRGJv2zjnZ+2MOTi+vTvFARDU40xZ2Q991P5Zgzs3ew8Mi+2vNvR+N3CjXt4YsZKujWtzmNXhdeDvY0JVdZzP9mm7+GXCdB5ONTv4nY0frd1zyHueT+ZelXLMG5gO6IibaJQY0KB/UsuKOcAzBwBVRrBZaPdjsbv9mfnMnRyEuCMjLFnoBoTOqwsU9BXj8P+NKccExPaI0XyPcrIxKVs2X2I/9zSnvrVyrodkjHGh6wsc8yGbyB5Elw4AuI7ux2N3z07J4V5azN4qm9LLmpUze1wjDE+ZmUZgCP7YOb9UP186P6Y29H43Qc/b+PtHzcz+KL63NLZphYwJhRZWQZg7t/h4C7o9x+ILuVuLH429ZdtPDZjBX9uWp3Hr7aRMcaEKivLrP0Slr0PXR6EOoE9OWVxvbdoK6M+WUG3JtV5c1AHGxljTAgL74lDDmfC7JFQsxX8+VG3o/GriT9u5h+fraZH8xq8fkt7YqMi3Q7JGONH4Z3cv/gbHN4Dt0yDqBi3o/GbCfM38uycNfRqeR6vDmhHTJT12I0JdeGb3FfPghUfOxdQa7V2Oxq/ef27Dfxr7lp6t67Fv29uS7SVYowJC+F5QfXQbvjsQajV1qm1hyBV5eVv1vGvuWu5tl0dXrbEbkxYCb8LqqpOYs/ZD9eOh8jQuytTVXnxq3W8/M16buwQxws3trGLp8aEmfAry6ycDimzoMcYqBF6QwFVlbFfrOHN+ZsY0Cme/+vXiogIe5KSMeEmvJL7gV0w52GI6wgXjXQ7Gp/Ly/fwzOcpTFqwhdsvrMeYa1raI/KMCVN+Se4i0g+4GqgAvK2qX/njOEWiCp8/BEcPQ9//QERoDQXctT+bkYlL+XlzJnd2acBjVze3xG5MGCt0IVZEJopIuoisPKm9l4isFZENIjIKQFVnqOpdwHDgZt+GfI5WToc1n8Glj0P1pm5H41Pfr8vgqld+4Ne0LF68sQ2P925hid2YMFeUq2yTgF4FG0QkEngduBJoAQwQkRYFNnncu95dB9NPlGMuvM/taHwmL9/Dv+au4faJv1CtXCyz77+Y6zvEuR2WMSYAFLoso6rzRaT+Sc2dgA2quglARKYCfUUkBRgLfKGqS3wU67k5NjomxMoxO7OO8EDiMn7Zkkn/jnUZ3aclpWNC49yMMcVX3Jp7HSC1wHIa0Bm4H+gBVBSRxqo6/uQdRWQYMAwgPt6Pz+w8Vo7p+Y+QKcd8tzadhz5cRk6eh5dvbku/dnXcDskYE2D8ckFVVV8FXj3LNhNEZCfQJyYmxj8zdh1MhzmPeMsxI/xyiJKUm+/hxa/WMf77jZx/Xnlev6U9jaqXczssY0wAKu6dLduBugWW47xtheLXm5iOl2MOhUQ5ZlPGQQZMWMT47zcysHM8M+672BK7Mea0ittzXww0EZEGOEm9PzCwsDuLSB+gT+PGjYsZximESDnmQHYur/1vA+/8tJlSUZG8OqAd17Sp7XZYxpgAV+jkLiKJwCVANRFJA0ar6tsiMgKYC0QCE1V1lV8iLYpj5Zg6CUFbjvF4lOlL0nj+y7XsPpjDTQlxPHLF+VQvH+t2aMaYICCq6nYMJCQkaFJSkm8+TBU+vBXWfw3Df4DqzXzzuSVo6ba9jJm9muWp+2gXX4kxfVrSpm4lt8MyxgQYEUlW1YRTrXN1+gG/lGWOlWN6PBV0iT19fzZjv1zDJ0u2U6N8LC/d1IZ+bevY3DDGmCILrZ77wXR4vTNUaQhDvwqai6g5efm889MWXvt2Pbn5ytCuDbive2PKxYbX1D/GmKIJj5778bljDjkPug6CxK6qzFnxG89/uYZtmYfp0bwGj1/dgvrVyrodmjEmyLma3FV1NjA7ISHhrmJ/WMosSJntTOUbBOWY5K2ZPPN5Cku37aNZzfJMvqMTf25a3e2wjDEhIjT+7j+yzxkdc15ruPB+t6M5oy27D/HPuWuYs+I3apSP5Z/Xt+b6DnFEWl3dGONDoVGW+WYMHMqAgR9CZGD+f7X30FFe/d96pizaSnRkBA/2aMpd3RpQJiYw4zXGBLfgL8tsXQDJ7zjj2Wu3811wPpKdm8+7C7fw2v82cCgnj5s71uXBHk2pUaGU26EZY0JYcHcb83Jg9gNQKR66/93taP5g3tp0npy5im2Zh7mkWXX+35XNaXZeebfDMsaEgeAuy/zwEuxeB7dMh5jAGWGSvj+bpz5bzee/7qRh9bK8N7QTXZvYxVJjTMkJ3rJMxlr44UW44EZo0sP3wZ2DfI/y/s9b+deXa8nJ9/BQz6bc/eeGxEYF/rBMY0xoCc6yjMcDs0ZCbDm44jm3owFg5fYsHvt0BcvTsujSuBpP92tFAxuvboxxSXAm9yWTIHWRM5VvOXfLHQdz8njpq3VMWrCZKmVjeKV/W65pU9ueYWqMcVXwJff9O+Hr0dCgG7Qt9OzCfjF31W+MmbWKnVnZDOwcz6NXnE/FMtGuxmSMMRCMF1S/+BvkH4XeL4NLveOsw7k8MXMls5bv4PzzyjNuYHs61KvsSizGGHMqwXVBdc3nzjQDl42Gqo38G9xp/Lh+Nw9/vJzdB3N4qGdT7rmkEdGRxX2glTHG+FbwlGWy98PnD0PNVnBRyU8xcORoPs9/uYZJC7bQqHpZJtx2Ea3jbI51Y0xgCp7k/u0/4MBOuHkKRJZsXfvXtH08+OEyNmYcYvBF9Rl15fmUirbhjcaYwOXz5C4iDYHHgIqqeoNPPjQtCRa/BZ2HQ1wHn3xkYeTle/jPvI28+u16qpWLZcrQznRpUq3Ejm+MMeeqUMViEZkoIukisvKk9l4islZENojIKABV3aSqQ30WoSp89TiUqwmXPuazjz2bTRkHuWH8Ql76eh1Xt67F3L90s8RujAkahe25TwLGAe8eaxCRSOB1oCeQBiwWkVmqutqnEa77ErYthN7/htiSmZfl81938vDHy4mJiuC1Ae3o06Z2iRzXGGN8pVDJXVXni0j9k5o7ARtUdROAiEwF+gK+S+6efGc636qNod0gn33smcxevoMHpi6lfXxlxg1sz3kVbfZGY0zwKc4YvjpAaoHlNKCOiFQVkfFAOxH5f6fbWUSGiUiSiCRlZGSceqPliZCxBi57skQuoh5L7An1qzD5jk6W2I0xQcvnF1RVdQ8wvBDbTRCRnUCfmJiYP14lzT0C3z0LdTpA82t8HeYfFEzs7wzuSFl7OLUxJogVp+e+HahbYDnO21ZoqjpbVYdVrFjxjyt/+S/s3w49nvL7naiW2I0xoaY4yX0x0EREGohIDNAfmFWUDxCRPiIyISsr6/crjuxzpvNt3BMadC1GiGdnid0YE4oKOxQyEVgINBORNBEZqqp5wAhgLpACfKSqq3wS1U8vQ3YW9Bjtk487HUvsxphQJarqdgwkJCRoUlKSs7B/B7zaDlr0hesm+O2YltiNMcFORJJVNeFU61yd8eqUZZl5z4F6oLv/bliyxG6MCXWuJvc/XFDNWAtLp0DCUKhczy/H/GLFTkvsxpiQF1g992//AdFlodvDfjvm+O830qh6OUvsxpiQFjg999RfYM1ncPEDUNZ/c7jk5iv1qpa1xG6MCWmB85SJr0dD2Rpw4b1uR2KMMUEvIMoyhzJ3wrYFcMmjEFPWzZCMMSYkBERZpmx+FlRpCO1vdzMcY4wJGYFRlsk9Apc+UeJPWDLGmFAVEGWZ/IhYaNHPzVCMMSakBERZJrJ6Y4gIjD8ijDEmFARGRo2McTsCY4wJKYGR3I0xxviUJXdjjAlBAXFB9Q/zuRtjjCmWgLigesonMRljjDlnVpYxxpgQZMndGGNCkCV3Y4wJQQHxmD0ROQCsLdBUESjqVdZA3KcasLsEjuP2PsfO09/HcWOf051bIMTmy30K8zP0xXHsd9S3+zRT1fKn3ENVXf8Ckk5annAOnxFw+xw7r0CMzZf7nPzzC6TYirvP6c4tEGLz8T5n/RkG2fmExe/omc4rUMsys20f28f2KdF9zkUgn0/Y7xMoZZkkPc0TvINZqJ7XyUL5PEP53AoK9fMM1fM703kFSs99gtsB+EmontfJQvk8Q/ncCgr18wzV8zvteQVEz90YY4xvBUrP3RhjjA+VSHIXkXwRWVbgq/4Ztp0nIkFRGxMRFZEpBZajRCRDRD5zMy5/EJF+3vM93+1YfCWcfn7HiMhBt2Pwt7OdYzDlmOIoqZ77EVVtW+BrSwkd198OAa1EpLR3uSewvSgfICJRPo/KPwYAP3pfC01EIv0Tjk8U++dnTKByrSwjIh1E5HsRSRaRuSJSq8DqQd4e/koR6eRWjIU0B7ja+34AkHhshYh0EpGFIrJURBaISDNv+2ARmSUi/wO+LfmQi0ZEygFdgKFAf2/bJSIyX0Q+F5G1IjJeRCK86w6KyIsishy40L3IC+Vcfn7zRaRtge1+FJE2JRp1MXh/dp8VWB4nIoO977eIyFMiskREVgTrX2pnOsdwUVLJvXSBksynIhINvAbcoKodgInA/xXYvoyqtgXu9a4LZFOB/iJSCmgN/Fxg3Rqgq6q2A54Eni2wrj3O+f+5xCI9d32BL1V1HbBHRDp42zsB9wMtgEbAdd72ssDPqtpGVX8s8WiL5lx+fm8DgwFEpClQSlWXl1jE/rdbVdsDbwAPux2MOTclVRI44k3WAIhIK6AV8LWIAEQCOwtsnwigqvNFpIKIVFLVfSUUa5Go6q/eawgDcHqBBVUEJotIE0CB6ALrvlbVzBIJsvgGAK9430/1Ln8G/KKqmwBEJBGndz8NyAemuxBnkZ3jz+9j4AkReQS4A5hUIsGWnE+8r8mc+A/bBBm36r0CrFLV0/3JfvL4zEAfrzkLeAG4BKhaoP1p4DtVvdabQOYVWHeohGIrFhGpAlwKXCAiivMfsQKfc/qfU7aq5pdclMVWpJ+fqh4Wka9x/qK5CehAcMnj93+1lzppfY73NR/3ckRxne0cQ55bNfe1QHURuRBARKJFpGWB9Td727sAWaoa6I9qmgg8paorTmqvyIkLdINLNCLfuQF4T1XrqWp9Va0LbAa6Ap1EpIG31n4zzgXXYHQuP7+3gFeBxaq617/h+dxWoIWIxIpIJeAytwPyg3A4xzNyJbmr6lGcpPG896LbMuCiAptki8hSYDzORbyApqppqvrqKVb9E3jOey7B2gMaAHx6Utt0b/tiYByQgpPwT94uKJzLz09Vk4H9wDslEKJPeEdm5ahqKvARsNL7utTVwHwoHM6xsOwOVXNOROQS4GFV7e12LG4Qkdo4ZZrzVdXjcjiF4h3R819VDfQRaOcsHM6xsOwOVWOKSERuwxlV81gQJfbhOAMVHnc7Fn8Jh3MsCuu5G2NMCLKeuzHGhCC/JHcRqSsi34nIahFZJSIPeNuriMjXIrLe+1rZ236LiPzqvSNuQcG7/USkl/cOyA0iMsof8RpjTKjxS1nGO5VALVVdIiLlcW6G6IcznCxTVcd6E3VlVX1URC4CUlR1r4hcCYxR1c7eeUnW4cz5kYYzOmOAqq72edDGGBNC/NJzV9WdqrrE+/4AzlC5Ojg3fUz2bjYZJ+GjqgsKjBVeBMR533cCNqjqJu/wyanezzDGGHMGfq+5e+/sa4czuqCmqh6bZuA3oOYpdhkKfOF9XwdILbAuzdtmjDHmDPx6Y413NsHpwF9Udb93HhkAVFW9t7MX3L47TnLv4s+4jDEm1Pmt5+6d+XE68L6qHpuIaNexqX29r+kFtm+Nc0t3X1Xd423eDtQt8LFx2HzbxhhzVv4aLSM406KmqOpLBVbNAm73vr8dmOndPh5nJrpB3mllj1kMNPHOXxKDM5f4LH/EbIwxocRfo2W6AD8AK4Bjd/D9Hafu/hEQjzOxz02qmikibwHXe9sA8lQ1wftZVwEv48xGOFFVC877bowx5hTsDlVjjAlBdoeqMcaEIEvuxhgTgiy5G2NMCLLkbowxIciSuzHGhCBL7iYsiUi+iCzzzlq6XET+6n0W7Jn2qS8iA0sqRmOKw5K7CVdHVLWtqrbEmXX0SmD0WfapD1hyN0HBxrmbsCQiB1W1XIHlhjh3RFcD6gHvAWW9q0eo6gIRWQQ0x3kY+GTgVWAscAkQC7yuqm+W2EkYcwaW3E1YOjm5e9v2Ac2AA4BHVbNFpAmQqKoJJz8UXESGATVU9RkRiQV+Am5U1c0lejLGnIJfZ4U0JkhFA+NEpC2QDzQ9zXaXA61F5AbvckWgCU7P3hhXWXI3huNlmXycmUpHA7uANjjXpbJPtxtwv6rOLZEgjSkCu6Bqwp6IVAfGA+PUqVNWBHaqqgcYhDNpHTjlmvIFdp0L3OOd3hoRaSoiZTEmAFjP3YSr0iKyDKcEk4dzAfXY9NT/AaaLyG3Al8Ahb/uvQL6ILAcmAa/gjKBZ4p3mOgPvoyONcZtdUDXGmBBkZRljjAlBltyNMSYEWXI3xpgQZMndGGNCkCV3Y4wJQZbcjTEmBFlyN8aYEGTJ3RhjQtD/B70GnbsWFg9+AAAAAElFTkSuQmCC\n", 238 | "text/plain": [ 239 | "
" 240 | ] 241 | }, 242 | "metadata": { 243 | "needs_background": "light" 244 | }, 245 | "output_type": "display_data" 246 | } 247 | ], 248 | "source": [ 249 | "df.plot(logy=True)" 250 | ] 251 | } 252 | ], 253 | "metadata": { 254 | "kernelspec": { 255 | "display_name": "Python 3", 256 | "language": "python", 257 | "name": "python3" 258 | }, 259 | "language_info": { 260 | "codemirror_mode": { 261 | "name": "ipython", 262 | "version": 3 263 | }, 264 | "file_extension": ".py", 265 | "mimetype": "text/x-python", 266 | "name": "python", 267 | "nbconvert_exporter": "python", 268 | "pygments_lexer": "ipython3", 269 | "version": "3.8.2" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 4 274 | } 275 | -------------------------------------------------------------------------------- /nbs_scratchpad/CitiesIndia.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "web_df = pd.read_html(\n", 21 | " \"https://en.wikipedia.org/wiki/List_of_cities_in_India_by_population\"\n", 22 | ")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "city_names = web_df[0][\"City\"].tolist()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "[\n", 44 | " \"Mumbai\",\n", 45 | " \"Delhi\",\n", 46 | " \"Bangalore\",\n", 47 | " \"Hyderabad\",\n", 48 | " \"Ahmedabad\",\n", 49 | " \"Chennai\",\n", 50 | " \"Kolkata\",\n", 51 | " \"Surat\",\n", 52 | " \"Pune\",\n", 53 | " \"Jaipur\",\n", 54 | " \"Visakhapatnama\",\n", 55 | " \"Kanpur\",\n", 56 | " \"Nagpur\",\n", 57 | " \"Lucknow\",\n", 58 | " \"Thane\",\n", 59 | " \"Bhopal\",\n", 60 | " \"Indore\",\n", 61 | " \"PimpriChinchwad\",\n", 62 | " \"Patna\",\n", 63 | " \"Vadodara\",\n", 64 | " \"Ghaziabad\",\n", 65 | " \"Ludhiana\",\n", 66 | " \"Agra\",\n", 67 | " \"Nashik\",\n", 68 | " \"Faridabad\",\n", 69 | " \"Meerut\",\n", 70 | " \"Rajkot\",\n", 71 | " \"KalyanDombivli\",\n", 72 | " \"VasaiVirar\",\n", 73 | " \"Varanasi\",\n", 74 | " \"Srinagar\",\n", 75 | " \"Aurangabad\",\n", 76 | " \"Dhanbad\",\n", 77 | " \"Amritsar\",\n", 78 | " \"NaviMumbai\",\n", 79 | " \"Allahabad\",\n", 80 | " \"Howrah\",\n", 81 | " \"Ranchi\",\n", 82 | " \"Gwalior\",\n", 83 | " \"Jabalpur\",\n", 84 | " \"Coimbatore\",\n", 85 | " \"Vijayawada\",\n", 86 | " \"Jodhpur\",\n", 87 | " \"Madurai\",\n", 88 | " \"Raipur\",\n", 89 | " \"Chandigarh\",\n", 90 | " \"Guntur\",\n", 91 | " \"Guwahati\",\n", 92 | " \"Solapur\",\n", 93 | " \"HubliDharwad\",\n", 94 | " \"Mysore\",\n", 95 | " \"Tiruchirappalli\",\n", 96 | " \"Bareilly\",\n", 97 | " \"Moradabad\",\n", 98 | " \"Tiruppur\",\n", 99 | " \"Gurgaon\",\n", 100 | " \"Aligarh\",\n", 101 | " \"Jalandhar\",\n", 102 | " \"Bhubaneswar\",\n", 103 | " \"Salem\",\n", 104 | " \"MiraBhayandar\",\n", 105 | " \"Warangal\",\n", 106 | " \"Jalgaon\",\n", 107 | " \"Kota\",\n", 108 | " \"Bhiwandi\",\n", 109 | " \"Saharanpur\",\n", 110 | " \"Gorakhpur\",\n", 111 | " \"Bikaner\",\n", 112 | " \"Amravati\",\n", 113 | " \"Noida\",\n", 114 | " \"Jamshedpur\",\n", 115 | " \"Bhilai\",\n", 116 | " \"Cuttack\",\n", 117 | " \"Firozabad\",\n", 118 | " \"Kochi\",\n", 119 | " \"Nellore\",\n", 120 | " \"Bhavnagar\",\n", 121 | " \"Dehradun\",\n", 122 | " \"Durgapur\",\n", 123 | " \"Asansol\",\n", 124 | " \"Rourkela\",\n", 125 | " \"Nanded\",\n", 126 | " \"Kolhapur\",\n", 127 | " \"Ajmer\",\n", 128 | " \"Akola\",\n", 129 | " \"Gulbarga\",\n", 130 | " \"Jamnagar\",\n", 131 | " \"Ujjain\",\n", 132 | " \"Loni\",\n", 133 | " \"Siliguri\",\n", 134 | " \"Jhansi\",\n", 135 | " \"Ulhasnagar\",\n", 136 | " \"Jammu\",\n", 137 | " \"SangliMirajKupwad\",\n", 138 | " \"Mangalore\",\n", 139 | " \"Erode\",\n", 140 | " \"Belgaum\",\n", 141 | " \"Ambattur\",\n", 142 | " \"Tirunelveli\",\n", 143 | " \"Malegaon\",\n", 144 | " \"Gaya\",\n", 145 | " \"Thiruvananthapuram\",\n", 146 | " \"Udaipur\",\n", 147 | " \"Kakinada\",\n", 148 | " \"Davanagere\",\n", 149 | " \"Kozhikode\",\n", 150 | " \"Maheshtala\",\n", 151 | " \"RajpurSonarpur\",\n", 152 | " \"Rajahmundry\",\n", 153 | " \"Bokaro\",\n", 154 | " \"SouthDumdum\",\n", 155 | " \"Bellary\",\n", 156 | " \"Patiala\",\n", 157 | " \"Gopalpur\",\n", 158 | " \"Agartala\",\n", 159 | " \"Bhagalpur\",\n", 160 | " \"Muzaffarnagar\",\n", 161 | " \"Bhatpara\",\n", 162 | " \"Panihati\",\n", 163 | " \"Latur\",\n", 164 | " \"Dhule\",\n", 165 | " \"Tirupati\",\n", 166 | " \"Rohtak\",\n", 167 | " \"Sagar\",\n", 168 | " \"Korba\",\n", 169 | " \"Bhilwara\",\n", 170 | " \"Berhampur\",\n", 171 | " \"Muzaffarpur\",\n", 172 | " \"Ahmednagar\",\n", 173 | " \"Mathura\",\n", 174 | " \"Kollam\",\n", 175 | " \"Avadi\",\n", 176 | " \"Kadapa\",\n", 177 | " \"Kamarhati\",\n", 178 | " \"Sambalpur\",\n", 179 | " \"Bilaspur\",\n", 180 | " \"Shahjahanpur\",\n", 181 | " \"Satara\",\n", 182 | " \"Bijapur\",\n", 183 | " \"Kurnool\",\n", 184 | " \"Rampur\",\n", 185 | " \"Shimoga\",\n", 186 | " \"Chandrapur\",\n", 187 | " \"Junagadh\",\n", 188 | " \"Thrissur\",\n", 189 | " \"Alwar\",\n", 190 | " \"Bardhaman\",\n", 191 | " \"Kulti\",\n", 192 | " \"Nizamabad\",\n", 193 | " \"Parbhani\",\n", 194 | " \"Tumkur\",\n", 195 | " \"Khammam\",\n", 196 | " \"Ozhukarai\",\n", 197 | " \"BiharSharif\",\n", 198 | " \"Panipat\",\n", 199 | " \"Darbhanga\",\n", 200 | " \"Bally\",\n", 201 | " \"Aizawl\",\n", 202 | " \"Dewas\",\n", 203 | " \"Ichalkaranji\",\n", 204 | " \"Karnal\",\n", 205 | " \"Bathinda\",\n", 206 | " \"Jalna\",\n", 207 | " \"Eluru\",\n", 208 | " \"Barasat\",\n", 209 | " \"KirariSulemanNagar\",\n", 210 | " \"Purnia\",\n", 211 | " \"Satna\",\n", 212 | " \"Mau\",\n", 213 | " \"Sonipat\",\n", 214 | " \"Farrukhabad\",\n", 215 | " \"Durg\",\n", 216 | " \"Imphal\",\n", 217 | " \"Ratlam\",\n", 218 | " \"Hapur\",\n", 219 | " \"Arrah\",\n", 220 | " \"Anantapur\",\n", 221 | " \"Karimnagar\",\n", 222 | " \"Etawah\",\n", 223 | " \"Ambarnath\",\n", 224 | " \"NorthDumdum\",\n", 225 | " \"Bharatpur\",\n", 226 | " \"Begusarai\",\n", 227 | " \"NewDelhi\",\n", 228 | " \"Gandhidham\",\n", 229 | " \"Baranagar\",\n", 230 | " \"Tiruvottiyur\",\n", 231 | " \"Pondicherry\",\n", 232 | " \"Sikar\",\n", 233 | " \"Thoothukudi\",\n", 234 | " \"Rewa\",\n", 235 | " \"Mirzapur\",\n", 236 | " \"Raichur\",\n", 237 | " \"Pali\",\n", 238 | " \"Ramagundam\",\n", 239 | " \"Haridwar\",\n", 240 | " \"Vijayanagaram\",\n", 241 | " \"Tenali\",\n", 242 | " \"Nagercoil\",\n", 243 | " \"SriGanganagar\",\n", 244 | " \"KarawalNagar\",\n", 245 | " \"Mango\",\n", 246 | " \"Thanjavur\",\n", 247 | " \"Bulandshahr\",\n", 248 | " \"Uluberia\",\n", 249 | " \"Katni\",\n", 250 | " \"Sambhal\",\n", 251 | " \"Singrauli\",\n", 252 | " \"Nadiad\",\n", 253 | " \"Secunderabad\",\n", 254 | " \"Naihati\",\n", 255 | " \"Yamunanagar\",\n", 256 | " \"Bidhannagar\",\n", 257 | " \"Pallavaram\",\n", 258 | " \"Bidar\",\n", 259 | " \"Munger\",\n", 260 | " \"Panchkula\",\n", 261 | " \"Burhanpur\",\n", 262 | " \"RaurkelaIndustrialTownship\",\n", 263 | " \"Kharagpur\",\n", 264 | " \"Dindigul\",\n", 265 | " \"Gandhinagar\",\n", 266 | " \"Hospet\",\n", 267 | " \"NangloiJat\",\n", 268 | " \"Malda\",\n", 269 | " \"Ongole\",\n", 270 | " \"Deoghar\",\n", 271 | " \"Chapra\",\n", 272 | " \"Haldia\",\n", 273 | " \"Khandwa\",\n", 274 | " \"Nandyal\",\n", 275 | " \"Morena\",\n", 276 | " \"Amroha\",\n", 277 | " \"Anand\",\n", 278 | " \"Bhind\",\n", 279 | " \"BhalswaJahangirPur\",\n", 280 | " \"Madhyamgram\",\n", 281 | " \"Bhiwani\",\n", 282 | " \"Berhampore\",\n", 283 | " \"Ambala\",\n", 284 | " \"Morbi\",\n", 285 | " \"Fatehpur\",\n", 286 | " \"Raebareli\",\n", 287 | " \"Mahaboobnagar\",\n", 288 | " \"Chittoor\",\n", 289 | " \"Bhusawal\",\n", 290 | " \"Orai\",\n", 291 | " \"Bahraich\",\n", 292 | " \"Vellore\",\n", 293 | " \"Mehsana\",\n", 294 | " \"Raiganj\",\n", 295 | " \"Sirsa\",\n", 296 | " \"Danapur\",\n", 297 | " \"Serampore\",\n", 298 | " \"SultanPurMajra\",\n", 299 | " \"Guna\",\n", 300 | " \"Jaunpur\",\n", 301 | " \"Panvel\",\n", 302 | " \"Shivpuri\",\n", 303 | " \"SurendranagarDudhrej\",\n", 304 | " \"Unnao\",\n", 305 | " \"Chinsurah\",\n", 306 | " \"Alappuzha\",\n", 307 | " \"Kottayam\",\n", 308 | " \"Machilipatnam\",\n", 309 | " \"Shimla\",\n", 310 | " \"Adoni\",\n", 311 | " \"Udupi\",\n", 312 | " \"Katihar\",\n", 313 | " \"Proddatur\",\n", 314 | " \"Saharsa\",\n", 315 | " \"Hindupur\",\n", 316 | " \"Sasaram\",\n", 317 | " \"Hajipur\",\n", 318 | " \"Bhimavaram\",\n", 319 | " \"Kumbakonam\",\n", 320 | " \"Dehri\",\n", 321 | " \"Madanapalle\",\n", 322 | " \"Siwan\",\n", 323 | " \"Bettiah\",\n", 324 | " \"Guntakal\",\n", 325 | " \"Srikakulam\",\n", 326 | " \"Motihari\",\n", 327 | " \"Dharmavaram\",\n", 328 | " \"Gudivada\",\n", 329 | " \"Phagwara\",\n", 330 | " \"Narasaraopet\",\n", 331 | " \"Suryapet\",\n", 332 | " \"Miryalaguda\",\n", 333 | " \"Tadipatri\",\n", 334 | " \"Karaikudi\",\n", 335 | " \"Kishanganj\",\n", 336 | " \"Jamalpur\",\n", 337 | " \"Ballia\",\n", 338 | " \"Kavali\",\n", 339 | " \"Tadepalligudem\",\n", 340 | " \"Amaravati\",\n", 341 | " \"Buxar\",\n", 342 | " \"Jehanabad\",\n", 343 | " \"Aurangabad\",\n", 344 | " \"Gangtok\"\n", 345 | "]\n" 346 | ] 347 | } 348 | ], 349 | "source": [ 350 | "def clean_numbers(city_name: str):\n", 351 | " return \"\".join([i for i in city_name if i.isalpha()])\n", 352 | "\n", 353 | "\n", 354 | "clean_city_names = [clean_numbers(city) for city in city_names]\n", 355 | "print(json.dumps(clean_city_names, indent=2))" 356 | ] 357 | } 358 | ], 359 | "metadata": { 360 | "kernelspec": { 361 | "display_name": "Python 3", 362 | "language": "python", 363 | "name": "python3" 364 | }, 365 | "language_info": { 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "file_extension": ".py", 371 | "mimetype": "text/x-python", 372 | "name": "python", 373 | "nbconvert_exporter": "python", 374 | "pygments_lexer": "ipython3", 375 | "version": "3.7.5" 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 2 380 | } 381 | -------------------------------------------------------------------------------- /nbs_scratchpad/DataTransforms_JHUDataFormat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import random\n", 11 | "from pathlib import Path\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "\n", 15 | "%load_ext autoreload\n", 16 | "%autoreload 2\n", 17 | "Path.ls = lambda x: list(x.iterdir())\n", 18 | "# from urllib.parse import quote_plus" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import urllib.request, json\n", 28 | "from urllib.error import HTTPError\n", 29 | "\n", 30 | "# def get_raw_data(raw_web_url=\"https://api.covid19india.org/raw_data.json\"):\n", 31 | "# with urllib.request.urlopen(raw_web_url) as url:\n", 32 | "# data_dict = json.loads(url.read().decode())\n", 33 | "# return data_dict[\"raw_data\"]\n", 34 | "\n", 35 | "\n", 36 | "def get_stats_history(stats_history_url=\"https://api.rootnet.in/covid19-in/stats/history\"):\n", 37 | " try:\n", 38 | " with urllib.request.urlopen(stats_history_url) as url:\n", 39 | " data_dict = json.loads(url.read().decode())\n", 40 | " return data_dict\n", 41 | " except HTTPError as e:\n", 42 | " print(f\"Using local backup of {stats_history_url}\")\n", 43 | " with open(\"history.json\") as f:\n", 44 | " return json.loads(f.read())\n", 45 | " \n", 46 | "# def get_state_data(\n", 47 | "# case_count_url=\"https://api.covid19india.org/state_district_wise.json\",\n", 48 | "# ):\n", 49 | "# with urllib.request.urlopen(case_count_url) as url:\n", 50 | "# data_dict = json.loads(url.read().decode())\n", 51 | "# return data_dict\n", 52 | "\n", 53 | "\n", 54 | "# def get_case_count(data, state=\"Karnataka\"):\n", 55 | "# df = pd.DataFrame(data[state][\"districtData\"])\n", 56 | "# df = df.transpose()\n", 57 | "# df.reset_index(inplace=True)\n", 58 | "# df.rename(columns={\"confirmed\": \"CaseCount\", \"index\": \"District\"}, inplace=True)\n", 59 | "# df.drop(columns=[\"lastupdatedtime\"], inplace=True)\n", 60 | "# return df" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "Using local backup of https://api.rootnet.in/covid19-in/stats/history\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "from typing import List, Dict\n", 78 | "stats:List = get_stats_history()[\"data\"]" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def get_date_df(stat: Dict) -> pd.DataFrame:\n", 88 | " day = stat[\"day\"]\n", 89 | "# print(stat.keys())\n", 90 | " regional = stat[\"regional\"]\n", 91 | " elements = [{\"Day\":day, \"Province/State\":region[\"loc\"], \"CaseCount\": region[\"confirmedCasesIndian\"]+region[\"confirmedCasesForeign\"]} for region in regional]\n", 92 | " return pd.DataFrame(elements)\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 5, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "regionwise_df = [get_date_df(stat) for stat in stats]\n", 102 | "regionwise_df = pd.concat(regionwise_df)\n", 103 | "regionwise_df.reset_index(inplace=True)\n", 104 | "regionwise_df.drop(columns=[\"index\"], inplace=True)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "data": { 114 | "text/plain": [ 115 | "Index(['Day', 'Province/State', 'CaseCount'], dtype='object')" 116 | ] 117 | }, 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "regionwise_df.columns" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "## Transform to the JHU data format\n", 132 | "See this [Github link to data](https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv) for reference" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 7, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "pd.set_option(\"display.max_rows\", 500)\n", 142 | "datewise_pivot_df = pd.pivot_table(\n", 143 | " regionwise_df,\n", 144 | " index=[\"Province/State\"],\n", 145 | " values=[\"CaseCount\"],\n", 146 | " columns=[\"Day\"],\n", 147 | " aggfunc=sum,\n", 148 | ").fillna(0)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "datewise_pivot_df.columns = [\n", 158 | " element[1] for element in datewise_pivot_df.columns.to_list()\n", 159 | "]" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "Index(['2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14',\n", 171 | " '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19',\n", 172 | " '2020-03-20', '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24',\n", 173 | " '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28'],\n", 174 | " dtype='object')" 175 | ] 176 | }, 177 | "execution_count": 9, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "datewise_pivot_df.columns" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 10, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "datewise_pivot_df.insert(0, \"Country/Region\", \"India\")\n", 193 | "datewise_pivot_df.insert(1, \"Lat\", 20)\n", 194 | "datewise_pivot_df.insert(2, \"Long\", 70)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 11, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "
\n", 206 | "\n", 219 | "\n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | "
Country/RegionLatLong2020-03-102020-03-112020-03-122020-03-132020-03-142020-03-152020-03-16...2020-03-192020-03-202020-03-212020-03-222020-03-232020-03-242020-03-252020-03-262020-03-272020-03-28
Province/State
Andaman and Nicobar IslandsIndia20700.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.01.06.0
Andhra PradeshIndia20700.00.00.01.01.01.01.0...2.03.03.05.07.08.09.011.012.014.0
BiharIndia20700.00.00.00.00.00.00.0...0.00.00.02.02.03.04.06.06.09.0
ChandigarhIndia20700.00.00.00.00.00.00.0...1.01.01.05.06.07.07.07.07.08.0
ChhattisgarhIndia20700.00.00.00.00.00.00.0...1.01.01.01.01.01.01.06.06.06.0
\n", 393 | "

5 rows × 22 columns

\n", 394 | "
" 395 | ], 396 | "text/plain": [ 397 | " Country/Region Lat Long 2020-03-10 2020-03-11 \\\n", 398 | "Province/State \n", 399 | "Andaman and Nicobar Islands India 20 70 0.0 0.0 \n", 400 | "Andhra Pradesh India 20 70 0.0 0.0 \n", 401 | "Bihar India 20 70 0.0 0.0 \n", 402 | "Chandigarh India 20 70 0.0 0.0 \n", 403 | "Chhattisgarh India 20 70 0.0 0.0 \n", 404 | "\n", 405 | " 2020-03-12 2020-03-13 2020-03-14 2020-03-15 \\\n", 406 | "Province/State \n", 407 | "Andaman and Nicobar Islands 0.0 0.0 0.0 0.0 \n", 408 | "Andhra Pradesh 0.0 1.0 1.0 1.0 \n", 409 | "Bihar 0.0 0.0 0.0 0.0 \n", 410 | "Chandigarh 0.0 0.0 0.0 0.0 \n", 411 | "Chhattisgarh 0.0 0.0 0.0 0.0 \n", 412 | "\n", 413 | " 2020-03-16 ... 2020-03-19 2020-03-20 \\\n", 414 | "Province/State ... \n", 415 | "Andaman and Nicobar Islands 0.0 ... 0.0 0.0 \n", 416 | "Andhra Pradesh 1.0 ... 2.0 3.0 \n", 417 | "Bihar 0.0 ... 0.0 0.0 \n", 418 | "Chandigarh 0.0 ... 1.0 1.0 \n", 419 | "Chhattisgarh 0.0 ... 1.0 1.0 \n", 420 | "\n", 421 | " 2020-03-21 2020-03-22 2020-03-23 2020-03-24 \\\n", 422 | "Province/State \n", 423 | "Andaman and Nicobar Islands 0.0 0.0 0.0 0.0 \n", 424 | "Andhra Pradesh 3.0 5.0 7.0 8.0 \n", 425 | "Bihar 0.0 2.0 2.0 3.0 \n", 426 | "Chandigarh 1.0 5.0 6.0 7.0 \n", 427 | "Chhattisgarh 1.0 1.0 1.0 1.0 \n", 428 | "\n", 429 | " 2020-03-25 2020-03-26 2020-03-27 2020-03-28 \n", 430 | "Province/State \n", 431 | "Andaman and Nicobar Islands 0.0 1.0 1.0 6.0 \n", 432 | "Andhra Pradesh 9.0 11.0 12.0 14.0 \n", 433 | "Bihar 4.0 6.0 6.0 9.0 \n", 434 | "Chandigarh 7.0 7.0 7.0 8.0 \n", 435 | "Chhattisgarh 1.0 6.0 6.0 6.0 \n", 436 | "\n", 437 | "[5 rows x 22 columns]" 438 | ] 439 | }, 440 | "execution_count": 11, 441 | "metadata": {}, 442 | "output_type": "execute_result" 443 | } 444 | ], 445 | "source": [ 446 | "datewise_pivot_df.head()" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 12, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "datewise_pivot_df.to_csv(\"time_series_covid19_confirmed_India_20200329_2.csv\", header=True)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 13, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "Province/State,Country/Region,Lat,Long,2020-03-10,2020-03-11,2020-03-12,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-23,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28\r\n", 468 | "Andaman and Nicobar Islands,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,6.0\r\n", 469 | "Andhra Pradesh,India,20,70,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,5.0,7.0,8.0,9.0,11.0,12.0,14.0\r\n", 470 | "Bihar,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,6.0,9.0\r\n", 471 | "Chandigarh,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,5.0,6.0,7.0,7.0,7.0,7.0,8.0\r\n", 472 | "Chhattisgarh,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,6.0,6.0\r\n", 473 | "Delhi,India,20,70,4.0,5.0,6.0,7.0,7.0,7.0,7.0,8.0,10.0,12.0,17.0,26.0,29.0,29.0,30.0,31.0,36.0,36.0,39.0\r\n", 474 | "Goa,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0\r\n", 475 | "Gujarat,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,7.0,18.0,29.0,33.0,38.0,43.0,43.0,45.0\r\n", 476 | "Haryana,India,20,70,14.0,14.0,14.0,14.0,14.0,14.0,14.0,15.0,17.0,17.0,17.0,17.0,21.0,26.0,28.0,28.0,30.0,30.0,33.0\r\n", 477 | "Himachal Pradesh,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0\r\n", 478 | "Jammu and Kashmir,India,20,70,1.0,1.0,1.0,1.0,2.0,2.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,7.0,13.0,13.0,20.0\r\n", 479 | "Karnataka,India,20,70,1.0,4.0,4.0,6.0,6.0,6.0,6.0,11.0,11.0,14.0,15.0,15.0,26.0,33.0,37.0,41.0,55.0,55.0,55.0\r\n", 480 | "Kerala,India,20,70,9.0,17.0,17.0,19.0,19.0,22.0,23.0,26.0,27.0,27.0,28.0,40.0,52.0,67.0,95.0,109.0,118.0,137.0,176.0\r\n", 481 | "Ladakh,India,20,70,2.0,2.0,3.0,3.0,3.0,3.0,4.0,6.0,8.0,8.0,10.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0\r\n", 482 | "Madhya Pradesh,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,6.0,7.0,14.0,20.0,20.0,30.0\r\n", 483 | "Maharashtra,India,20,70,2.0,2.0,11.0,14.0,14.0,32.0,32.0,39.0,42.0,47.0,52.0,63.0,67.0,74.0,89.0,128.0,124.0,130.0,180.0\r\n", 484 | "Manipur,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0\r\n", 485 | "Mizoram,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0\r\n", 486 | "Odisha,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0\r\n", 487 | "Puducherry,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0\r\n", 488 | "Punjab,India,20,70,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,13.0,21.0,21.0,29.0,29.0,33.0,33.0,38.0\r\n", 489 | "Rajasthan,India,20,70,2.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,7.0,17.0,17.0,24.0,28.0,32.0,36.0,41.0,41.0,54.0\r\n", 490 | "Tamil Nadu,India,20,70,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,7.0,9.0,15.0,18.0,26.0,29.0,40.0\r\n", 491 | "Telengana,India,20,70,1.0,1.0,1.0,1.0,1.0,3.0,3.0,5.0,6.0,6.0,17.0,21.0,22.0,32.0,35.0,35.0,44.0,45.0,56.0\r\n", 492 | "Uttar Pradesh,India,20,70,9.0,9.0,11.0,11.0,12.0,13.0,13.0,15.0,16.0,19.0,23.0,24.0,27.0,31.0,33.0,37.0,41.0,41.0,55.0\r\n", 493 | "Uttarakhand,India,20,70,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,3.0,3.0,4.0,4.0,5.0,5.0,5.0\r\n", 494 | "West Bengal,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0,4.0,7.0,9.0,9.0,10.0,10.0,15.0\r\n" 495 | ] 496 | } 497 | ], 498 | "source": [ 499 | "!cat time_series_covid19_confirmed_India_20200329_2.csv" 500 | ] 501 | } 502 | ], 503 | "metadata": { 504 | "kernelspec": { 505 | "display_name": "Python 3", 506 | "language": "python", 507 | "name": "python3" 508 | }, 509 | "language_info": { 510 | "codemirror_mode": { 511 | "name": "ipython", 512 | "version": 3 513 | }, 514 | "file_extension": ".py", 515 | "mimetype": "text/x-python", 516 | "name": "python", 517 | "nbconvert_exporter": "python", 518 | "pygments_lexer": "ipython3", 519 | "version": "3.7.5" 520 | } 521 | }, 522 | "nbformat": 4, 523 | "nbformat_minor": 2 524 | } 525 | -------------------------------------------------------------------------------- /nbs_scratchpad/States-BedCounts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "url = \"https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Requirement already satisfied: lxml in /Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages (4.5.0)\r\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "import json\n", 27 | "import random\n", 28 | "from pathlib import Path\n", 29 | "\n", 30 | "# !pip install camelot\n", 31 | "import camelot\n", 32 | "\n", 33 | "!pip install lxml\n", 34 | "import pandas as pd\n", 35 | "\n", 36 | "%load_ext autoreload\n", 37 | "%autoreload 2\n", 38 | "Path.ls = lambda x: list(x.iterdir())" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "df = pd.read_html(url, header=None)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/html": [ 58 | "
\n", 59 | "\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
012345
0States/UTsRural hospitalsRural hospitalsUrban hospitalsUrban hospitalsAs on
1NaNNo.BedsNo.BedsAs on
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | " 0 1 2 3 \\\n", 109 | "0 States/UTs Rural hospitals Rural hospitals Urban hospitals \n", 110 | "1 NaN No. Beds No. \n", 111 | "\n", 112 | " 4 5 \n", 113 | "0 Urban hospitals As on \n", 114 | "1 Beds As on " 115 | ] 116 | }, 117 | "execution_count": 4, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "govt_hospitals = df[1]\n", 124 | "govt_hospitals[:2]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 5, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "headers = [\"State\", \"RuralHospitalsCount\", \"RuralBeds\", \"UrbanHospitals\", \"UrbanBeds\", \"Date\"]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "['State',\n", 145 | " 'RuralHospitalsCount',\n", 146 | " 'RuralBeds',\n", 147 | " 'UrbanHospitals',\n", 148 | " 'UrbanBeds',\n", 149 | " 'Date']" 150 | ] 151 | }, 152 | "execution_count": 6, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "headers" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 7, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "govt_hospitals.columns = headers" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "gvt_hosp_df = govt_hospitals[2:]" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 9, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "gvt_hosp_df.reset_index(drop=True, inplace=True)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 10, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "from dateutil.parser import parse\n", 195 | "\n", 196 | "def date_parser(date: str):\n", 197 | " \"\"\"\n", 198 | " Argument:\n", 199 | " date(str): Input string\n", 200 | " \n", 201 | " Returns:\n", 202 | " dateutil object\n", 203 | " \"\"\"\n", 204 | " try:\n", 205 | " date = str(date)\n", 206 | " return parse(date)\n", 207 | " except:\n", 208 | " return None\n", 209 | " \n", 210 | "# gvt_hosp_df[\"Date\"] = gvt_hosp_df[\"Date\"].apply(parse)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 11, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stderr", 220 | "output_type": "stream", 221 | "text": [ 222 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/pandas/core/indexing.py:670: SettingWithCopyWarning: \n", 223 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 224 | "\n", 225 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 226 | " self._setitem_with_indexer(indexer, value)\n", 227 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 228 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 229 | "\n", 230 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 231 | " \"\"\"Entry point for launching an IPython kernel.\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "gvt_hosp_df.loc[36, \"Date\"] = \"01.01.2016\"" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 12, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stderr", 246 | "output_type": "stream", 247 | "text": [ 248 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 249 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 250 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 251 | "\n", 252 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 253 | " \"\"\"Entry point for launching an IPython kernel.\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "gvt_hosp_df[\"Date\"] = gvt_hosp_df[\"Date\"].apply(parse)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 13, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stderr", 268 | "output_type": "stream", 269 | "text": [ 270 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 271 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 272 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 273 | "\n", 274 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 275 | " \"\"\"Entry point for launching an IPython kernel.\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "gvt_hosp_df[\"Source\"] = \"https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\"" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 14, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stderr", 290 | "output_type": "stream", 291 | "text": [ 292 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 293 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 294 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 295 | "\n", 296 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 297 | " \"\"\"Entry point for launching an IPython kernel.\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "gvt_hosp_df[\"State\"] = gvt_hosp_df[\"State\"].apply(lambda x: x.replace(\"*\", \"\"))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 29, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "gvt_hosp_df.to_csv(\"Hospitals.csv\", index=False)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 31, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "name": "stdout", 321 | "output_type": "stream", 322 | "text": [ 323 | "State,RuralHospitalsCount,RuralBeds,UrbanHospitals,UrbanBeds,Date,Source\r\n", 324 | "Andhra Pradesh,193,6480,65,16658,2017-01-01 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 325 | "Arunachal Pradesh,208,2136,10,268,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 326 | "Assam,1176,10944,50,6198,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 327 | "Bihar,930,6083,103,5936,2016-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 328 | "Chhattisgarh,169,5070,45,4342,2016-01-01 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 329 | "Goa,17,1405,25,1608,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 330 | "Gujarat,364,11715,122,20565,2016-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 331 | "Haryana,609,6690,59,4550,2016-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n", 332 | "Himachal Pradesh,705,5665,96,6734,2017-12-31 00:00:00,https://pib.gov.in/PressReleasePage.aspx?PRID=1539877\r\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "!head Hospitals.csv" 338 | ] 339 | } 340 | ], 341 | "metadata": { 342 | "kernelspec": { 343 | "display_name": "Python 3", 344 | "language": "python", 345 | "name": "python3" 346 | }, 347 | "language_info": { 348 | "codemirror_mode": { 349 | "name": "ipython", 350 | "version": 3 351 | }, 352 | "file_extension": ".py", 353 | "mimetype": "text/x-python", 354 | "name": "python", 355 | "nbconvert_exporter": "python", 356 | "pygments_lexer": "ipython3", 357 | "version": "3.7.5" 358 | } 359 | }, 360 | "nbformat": 4, 361 | "nbformat_minor": 2 362 | } 363 | -------------------------------------------------------------------------------- /nbs_scratchpad/TestingBiasIndiaCaseCount.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 16, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "The autoreload extension is already loaded. To reload it, use:\n", 13 | " %reload_ext autoreload\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import json\n", 19 | "import random\n", 20 | "from datetime import datetime\n", 21 | "from pathlib import Path\n", 22 | "\n", 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "from fbprophet import Prophet\n", 26 | "\n", 27 | "%load_ext autoreload\n", 28 | "%autoreload 2\n", 29 | "Path.ls = lambda x: list(x.iterdir())" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import json\n", 39 | "from urllib.request import urlopen\n", 40 | "\n", 41 | "from pandas import json_normalize\n", 42 | "\n", 43 | "response = urlopen(\"https://api.covid19india.org/data.json\")\n", 44 | "json_data = json.loads(response.read().decode(\"utf-8\", \"replace\"))\n", 45 | "# print(json_data.keys())\n", 46 | "case_count_dict = json_normalize(json_data[\"cases_time_series\"])\n", 47 | "case_count_df = pd.DataFrame(case_count_dict)\n", 48 | "tested_dict = json_normalize(json_data[\"tested\"])\n", 49 | "tested_df = pd.DataFrame(tested_dict)\n", 50 | "# case_count_df, tested_df" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "tested_df = tested_df[[\"totalsamplestested\", \"updatetimestamp\"]]" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 7, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stderr", 69 | "output_type": "stream", 70 | "text": [ 71 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.8/site-packages/pandas/core/generic.py:5303: SettingWithCopyWarning:\n", 72 | "\n", 73 | "\n", 74 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 75 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 76 | "\n", 77 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 78 | "\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "from dateutil.parser import parse\n", 84 | "tested_df.updatetimestamp = tested_df.updatetimestamp.apply(parse)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 21, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stderr", 94 | "output_type": "stream", 95 | "text": [ 96 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.8/site-packages/pandas/core/generic.py:6746: SettingWithCopyWarning:\n", 97 | "\n", 98 | "\n", 99 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 100 | "\n", 101 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 102 | "\n", 103 | ":2: SettingWithCopyWarning:\n", 104 | "\n", 105 | "\n", 106 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 107 | "\n", 108 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 109 | "\n", 110 | "/Users/nirant/miniconda3/envs/nanda/lib/python3.8/site-packages/pandas/core/generic.py:5303: SettingWithCopyWarning:\n", 111 | "\n", 112 | "\n", 113 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 114 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 115 | "\n", 116 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 117 | "\n" 118 | ] 119 | }, 120 | { 121 | "data": { 122 | "text/html": [ 123 | "
\n", 124 | "\n", 137 | "\n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | "
totalsamplestestedupdatetimestamp
065002020-03-13 00:00:00
1131252020-03-18 18:00:00
2133162020-03-19 10:00:00
3141752020-03-19 18:00:00
4143762020-03-20 10:00:00
5154042020-03-20 18:00:00
6157012020-03-21 10:00:00
7169112020-03-21 18:00:00
8169992020-03-22 10:00:00
9181272020-03-22 18:00:00
10183832020-03-23 10:00:00
11207072020-03-23 20:00:00
12208642020-03-24 10:00:00
13226942020-03-24 20:00:00
14229282020-03-25 10:00:00
15251442020-03-25 20:00:00
18276882020-03-27 09:00:00
19384422020-03-30 00:00:00
20427882020-03-31 00:00:00
21479512020-01-04 00:00:00
22558512020-02-04 21:00:00
23566802020-03-04 09:00:00
24692452020-03-04 21:00:00
25799502020-04-04 21:00:00
26895342020-05-04 21:00:00
\n", 273 | "
" 274 | ], 275 | "text/plain": [ 276 | " totalsamplestested updatetimestamp\n", 277 | "0 6500 2020-03-13 00:00:00\n", 278 | "1 13125 2020-03-18 18:00:00\n", 279 | "2 13316 2020-03-19 10:00:00\n", 280 | "3 14175 2020-03-19 18:00:00\n", 281 | "4 14376 2020-03-20 10:00:00\n", 282 | "5 15404 2020-03-20 18:00:00\n", 283 | "6 15701 2020-03-21 10:00:00\n", 284 | "7 16911 2020-03-21 18:00:00\n", 285 | "8 16999 2020-03-22 10:00:00\n", 286 | "9 18127 2020-03-22 18:00:00\n", 287 | "10 18383 2020-03-23 10:00:00\n", 288 | "11 20707 2020-03-23 20:00:00\n", 289 | "12 20864 2020-03-24 10:00:00\n", 290 | "13 22694 2020-03-24 20:00:00\n", 291 | "14 22928 2020-03-25 10:00:00\n", 292 | "15 25144 2020-03-25 20:00:00\n", 293 | "18 27688 2020-03-27 09:00:00\n", 294 | "19 38442 2020-03-30 00:00:00\n", 295 | "20 42788 2020-03-31 00:00:00\n", 296 | "21 47951 2020-01-04 00:00:00\n", 297 | "22 55851 2020-02-04 21:00:00\n", 298 | "23 56680 2020-03-04 09:00:00\n", 299 | "24 69245 2020-03-04 21:00:00\n", 300 | "25 79950 2020-04-04 21:00:00\n", 301 | "26 89534 2020-05-04 21:00:00" 302 | ] 303 | }, 304 | "execution_count": 21, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "tested_df[\"totalsamplestested\"].replace(\"\", np.nan, inplace=True)\n", 311 | "tested_df.dropna(subset=[\"totalsamplestested\"], inplace=True)\n", 312 | "tested_df.totalsamplestested = tested_df.totalsamplestested.apply(int)\n", 313 | "tested_df" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 22, 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "data": { 323 | "text/plain": [ 324 | "0 NaN\n", 325 | "1 6625.0\n", 326 | "2 191.0\n", 327 | "3 859.0\n", 328 | "4 201.0\n", 329 | "5 1028.0\n", 330 | "6 297.0\n", 331 | "7 1210.0\n", 332 | "8 88.0\n", 333 | "9 1128.0\n", 334 | "10 256.0\n", 335 | "11 2324.0\n", 336 | "12 157.0\n", 337 | "13 1830.0\n", 338 | "14 234.0\n", 339 | "15 2216.0\n", 340 | "18 2544.0\n", 341 | "19 10754.0\n", 342 | "20 4346.0\n", 343 | "21 5163.0\n", 344 | "22 7900.0\n", 345 | "23 829.0\n", 346 | "24 12565.0\n", 347 | "25 10705.0\n", 348 | "26 9584.0\n", 349 | "Name: totalsamplestested, dtype: float64" 350 | ] 351 | }, 352 | "execution_count": 22, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "tested_df.totalsamplestested.diff()" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 23, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "import matplotlib.pyplot as plt" 368 | ] 369 | } 370 | ], 371 | "metadata": { 372 | "kernelspec": { 373 | "display_name": "Python 3", 374 | "language": "python", 375 | "name": "python3" 376 | }, 377 | "language_info": { 378 | "codemirror_mode": { 379 | "name": "ipython", 380 | "version": 3 381 | }, 382 | "file_extension": ".py", 383 | "mimetype": "text/x-python", 384 | "name": "python", 385 | "nbconvert_exporter": "python", 386 | "pygments_lexer": "ipython3", 387 | "version": "3.8.2" 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 4 392 | } 393 | -------------------------------------------------------------------------------- /nbs_scratchpad/TravelHistory.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import urllib.request, json\n", 10 | "\n", 11 | "with urllib.request.urlopen(\n", 12 | " \"https://api.steinhq.com/v1/storages/5e736c1db88d3d04ae0815b3/Raw_Data\"\n", 13 | ") as url:\n", 14 | " data = json.loads(url.read().decode())\n", 15 | " " 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stderr", 25 | "output_type": "stream", 26 | "text": [ 27 | "/Users/meghanabhange/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tqdm/std.py:658: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", 28 | " from pandas import Panel\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import pandas as pd\n", 34 | "import re\n", 35 | "from tqdm import tqdm\n", 36 | "tqdm.pandas()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "df = pd.DataFrame(data)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "30 Travelled from Thailand and Malaysia\n", 57 | "31 Travelled from Iran\n", 58 | "32 Travelled from Iran\n", 59 | "33 Travelled from Oman\n", 60 | "34 Travelled from Italy on 29/02/2020 through Doha\n", 61 | "Name: Notes, dtype: object" 62 | ] 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "df[\"Notes\"][30:35]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "import spacy\n", 80 | "nlp = spacy.load(\"en_core_web_sm\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 6, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "from spacy.tokens import Span\n", 90 | "\n", 91 | "def get_travel_status(span):\n", 92 | " if span.label_ ==\"GPE\":\n", 93 | " prev_token = span.doc[span.start - 1]\n", 94 | " if prev_token.text in (\"from\", \"through\", \"via\", \"Via\"):\n", 95 | " return(\"from\")\n", 96 | " elif prev_token.text in (\"to\", \"and\"):\n", 97 | " return(\"to\")\n", 98 | " return \"to\"\n", 99 | "\n", 100 | "# Register the Span extension as 'travel_status'\n", 101 | "Span.set_extension(\"travel_status\", getter=get_travel_status, force=True)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "[('Indian', 'Indian')]\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "from spacy.tokens import Span, Token\n", 119 | "\n", 120 | "def get_nat(span):\n", 121 | " if span.label_ ==\"NORP\":\n", 122 | " return span.text\n", 123 | "\n", 124 | "# Register the Span extension as 'nationality'\n", 125 | "Span.set_extension(\"nationality\", getter=get_nat, force=True)\n", 126 | "\n", 127 | "doc = nlp(\"Indian Tourist\")\n", 128 | "print([(ent.text, ent._.nationality) for ent in doc.ents])" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 8, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "from spacy.matcher import Matcher" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 9, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "[('friend', None), ('and', None), ('family', None), ('of', 'friend and family'), ('p23', None)]\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "def get_rel(token):\n", 155 | " if token.text == \"of\":\n", 156 | " prev_token = token.doc[token.i - 1]\n", 157 | " prev2 = None\n", 158 | " if token.i > 2:\n", 159 | " prev2 = token.doc[token.i - 2]\n", 160 | " if prev2.text.lower() == \"and\":\n", 161 | " return f\"{token.doc[token.i - 3]} {token.doc[token.i - 2]} {token.doc[token.i - 1]}\"\n", 162 | " if prev_token.text.lower() in (\"members\", \"member\"):\n", 163 | " return \"Family Member\"\n", 164 | " else:\n", 165 | " return prev_token.text\n", 166 | "\n", 167 | "\n", 168 | "# Register the Span extension as 'relationship'\n", 169 | "Token.set_extension(\"relationship\", getter=get_rel, force=True)\n", 170 | "\n", 171 | "doc = nlp(\"friend and family of p23\")\n", 172 | "print([(ent.text, ent._.relationship) for ent in doc])" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 10, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "def extract_relationship(sent):\n", 182 | " if not sent:\n", 183 | " return []\n", 184 | " s = re.sub(r'[^\\w\\s]',' ',sent)\n", 185 | " doc = nlp(s)\n", 186 | " for tok in doc:\n", 187 | " if tok._.relationship:\n", 188 | " return(tok._.relationship)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 11, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "def extract_travel_place(sent):\n", 198 | " if not sent:\n", 199 | " return []\n", 200 | " s = re.sub(r'[^\\w\\s]',' ',sent)\n", 201 | " doc = nlp(s)\n", 202 | " travel = []\n", 203 | " for ent in doc.ents:\n", 204 | " if ent._.travel_status:\n", 205 | " travel.append(ent.text)\n", 206 | " return travel" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 12, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "def extract_nationality(sent):\n", 216 | " if not sent:\n", 217 | " return []\n", 218 | " s = re.sub(r'[^\\w\\s]',' ',sent)\n", 219 | " doc = nlp(s)\n", 220 | " nat = []\n", 221 | " for ent in doc.ents:\n", 222 | " if ent._.nationality:\n", 223 | " nat.append(ent._.nationality)\n", 224 | " return nat" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 13, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "import urllib.request, json\n", 234 | "\n", 235 | "with urllib.request.urlopen(\n", 236 | " \"https://raw.githubusercontent.com/bhanuc/indian-list/master/state-city.json\"\n", 237 | ") as url:\n", 238 | " state_city = json.loads(url.read().decode())\n", 239 | "\n", 240 | " " 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 14, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "l = [\"India\", \"Mumbai\"]\n", 250 | "for k, v in state_city.items():\n", 251 | " l.append(k)\n", 252 | " l = l+v" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 15, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "l= [ele.replace(\"*\", \"\") for ele in l]" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 16, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/plain": [ 272 | "[False, True, True, True, False]" 273 | ] 274 | }, 275 | "execution_count": 16, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "def extract_foreign(sent):\n", 282 | " if not sent:\n", 283 | " return []\n", 284 | " s = re.sub(r'[^\\w\\s]',' ',sent)\n", 285 | " doc = nlp(s)\n", 286 | " is_foreign = []\n", 287 | " for ent in doc.ents:\n", 288 | " if ent.label_==\"GPE\":\n", 289 | " is_foreign.append(not(ent.text in l))\n", 290 | " return is_foreign\n", 291 | "\n", 292 | "extract_foreign(\"India, China Italy, Japan, Pune, 1989 mountains Apple Meghana\")" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 17, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "import re\n", 302 | "def find_travelled(data):\n", 303 | " df = data.copy()\n", 304 | " df[\"Relationship\"] = df[\"Notes\"].progress_apply(extract_relationship)\n", 305 | " df[\"Travel Place\"] = df[\"Notes\"].progress_apply(extract_travel_place)\n", 306 | " df[\"Nationality\"] = df[\"Notes\"].progress_apply(extract_nationality)\n", 307 | " df[\"is_foreign\"] = df[\"Notes\"].progress_apply(extract_foreign)\n", 308 | " return df\n", 309 | " " 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 18, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "name": "stderr", 319 | "output_type": "stream", 320 | "text": [ 321 | "100%|██████████| 1040/1040 [00:03<00:00, 327.28it/s]\n", 322 | "100%|██████████| 1040/1040 [00:03<00:00, 330.57it/s]\n", 323 | "100%|██████████| 1040/1040 [00:03<00:00, 334.87it/s]\n", 324 | "100%|██████████| 1040/1040 [00:03<00:00, 338.28it/s]\n" 325 | ] 326 | } 327 | ], 328 | "source": [ 329 | "find_travelled(df).to_csv(\"rel.csv\")" 330 | ] 331 | } 332 | ], 333 | "metadata": { 334 | "kernelspec": { 335 | "display_name": "Python 3", 336 | "language": "python", 337 | "name": "python3" 338 | }, 339 | "language_info": { 340 | "codemirror_mode": { 341 | "name": "ipython", 342 | "version": 3 343 | }, 344 | "file_extension": ".py", 345 | "mimetype": "text/x-python", 346 | "name": "python", 347 | "nbconvert_exporter": "python", 348 | "pygments_lexer": "ipython3", 349 | "version": "3.7.3" 350 | } 351 | }, 352 | "nbformat": 4, 353 | "nbformat_minor": 4 354 | } 355 | -------------------------------------------------------------------------------- /nbs_scratchpad/VisualizeTravelHistory.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import geopandas\n", 11 | "import geopatra" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "raw_df = pd.read_json(\n", 21 | " \"https://api.steinhq.com/v1/storages/5e737e60b88d3d04ae0815b8/Sheet1\"\n", 22 | ")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "df = raw_df[[\"Patient\", \"PlaceName\", \"Lat\", \"Long\"]]" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "df.dropna(inplace=True)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.Long, df.Lat))\n", 50 | "gdf.folium.plot(tiles=\"google\")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 8, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "gdf = geopandas.GeoDataFrame(\n", 60 | " df, geometry=geopandas.points_from_xy(df.Long, df.Lat))\n", 61 | "m = gdf.folium.plot(tiles=\"google\")" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 9, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "m.save(\"test.html\")" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [] 82 | } 83 | ], 84 | "metadata": { 85 | "kernelspec": { 86 | "display_name": "Python 3", 87 | "language": "python", 88 | "name": "python3" 89 | }, 90 | "language_info": { 91 | "codemirror_mode": { 92 | "name": "ipython", 93 | "version": 3 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python", 99 | "pygments_lexer": "ipython3", 100 | "version": "3.7.7" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 2 105 | } 106 | -------------------------------------------------------------------------------- /relationship_extractor_notes_transcriber.md: -------------------------------------------------------------------------------- 1 | # Why? 2 | Need something to transcribe notes written by volunteers into machine readable fields like `is_foreign` (set as `True` or `False`). 3 | 4 | 5 | # What works for relationship extraction API 6 | 7 | ## Relationship 8 | What works 9 | 10 | ``` 11 | "Family of P13" 12 | "Son of P12 and Daughter of P12" 13 | "Friend of P12, P12, 14 and son of P13" 14 | ``` 15 | 16 | What doesn't work : Relationship without word `of` 17 | ``` 18 | "Friend: P12" 19 | "Friends with P12" 20 | "Wife of p12" #Expected P12 (throws error with small p) 21 | ``` 22 | 23 | ## Travel Places 24 | What works 25 | ``` 26 | Travelled from Wuhan 27 | Travel history Italy, Pune, Mumbai 28 | Travlled from South Korea via Mumbai 29 | ``` 30 | 31 | ## Nationality 32 | What works 33 | ``` 34 | Indian student studying in wuhan 35 | Italian tourise 36 | ``` 37 | What doesn't work 38 | ``` 39 | Indian travelling with Italian tourist 40 | # Will return Indian and Italian 41 | ``` 42 | 43 | ## API Details 44 | API URL : http://coronatravelhistory.pythonanywhere.com/ 45 | 46 | Example request : 47 | ``` 48 | curl --header "Content-Type: application/json" --request POST --data '{"patients":[{"patientId":"1","notes":"Travelled from Italy"}]}' http://coronatravelhistory.pythonanywhere.com/ 49 | ``` 50 | 51 | Python Example Request : 52 | 53 | ```python 54 | import requests 55 | 56 | headers = { 57 | 'Content-Type': 'application/json', 58 | } 59 | 60 | data = '{ 61 | "patients": [ 62 | { 63 | "patientId": "1", 64 | "notes": "Indian Student Travelled from Italy, Family Member of P13 Friend with P12" 65 | } 66 | ] 67 | }' 68 | 69 | response = requests.post('http://coronatravelhistory.pythonanywhere.com/', headers=headers, data=data) 70 | ``` 71 | 72 | Input Data Example/Input JSON Format: 73 | ```javascript 74 | { 75 | "patients": [ 76 | { 77 | "patientId": "1", 78 | "notes": "Indian Student Travelled from Italy, Family Member of P13 Friend of P12" 79 | } 80 | ] 81 | } 82 | ``` 83 | Returns/Output JSON Format : 84 | ```javascript 85 | { 86 | "patients": [ 87 | { 88 | "1": { 89 | "nationality": [ 90 | "Indian" 91 | ], 92 | "place_attributes": [ 93 | { 94 | "is_foreign": true, 95 | "place": "Italy" 96 | } 97 | ], 98 | "relationship": [ 99 | { 100 | "link": "Family Member", 101 | "with": [ 102 | "P13" 103 | ] 104 | }, 105 | { 106 | "link": "Friend", 107 | "with": [ 108 | "P12" 109 | ] 110 | } 111 | ], 112 | "travel": [ 113 | "Italy" 114 | ] 115 | } 116 | } 117 | ] 118 | } 119 | ``` -------------------------------------------------------------------------------- /relationship_server.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify, abort 2 | import spacy 3 | from spacy.tokens import Span 4 | from spacy.tokens import Token 5 | import functools 6 | import re 7 | import json 8 | import urllib.request 9 | import logging 10 | 11 | nlp = spacy.load("en_core_web_lg") 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def make_dict_lowercase(d): 17 | """ 18 | Utliity method to convert keys and values in a dictionary `d` to lowercase. 19 | 20 | Args: 21 | `d` (:obj:`dict`): dictionary whose key and values have to be converted into lowercase 22 | 23 | Returns: 24 | `lower_case_dict` that is a copy of `d` but with the key and value converted to lowercase 25 | 26 | """ 27 | lower_case_dict = dict() 28 | for k in d.keys(): 29 | lower_case_dict[k.lower()] = d[k].lower() 30 | return lower_case_dict 31 | 32 | 33 | def load_country_acryonym_json( 34 | download_url: str = "https://raw.githubusercontent.com/rohanrmallya/coronaIndia/master/data/countries_acronym_aliases_flattened.json", 35 | ) -> None: 36 | 37 | """ 38 | Loading JSON that has alias / acronym to country name mapping. 39 | 40 | Args: 41 | download_url (:obj:`str`, optional): The URL from where the .json containing the alias-to-country mapping can be fetched. 42 | 43 | Returns: 44 | json converted to :obj:`dict` if the `download_url` could be fetched and read, None otherwise. 45 | 46 | """ 47 | 48 | with urllib.request.urlopen(download_url) as url: 49 | return json.loads(url.read().decode()) if url.getcode() == 200 else {} 50 | 51 | 52 | country_acronym_lookup = make_dict_lowercase(load_country_acryonym_json()) 53 | 54 | 55 | def acronym_to_country(acronym): 56 | """ 57 | Retrieve country name from `acronym` using `country_acronym_lookup` as reference 58 | 59 | Args: 60 | acryonym (:obj:`str`): acronym for which a country has to be searched 61 | 62 | Returns: 63 | str: the `country` mapped to `acronym` if such a mapping is found. 64 | the `acronym` if no mapping is found 65 | """ 66 | country = country_acronym_lookup.get(acronym.lower()) 67 | return country.title() if country != None else acronym.title() 68 | 69 | 70 | with urllib.request.urlopen( 71 | "https://raw.githubusercontent.com/bhanuc/indian-list/master/state-city.json" 72 | ) as url: 73 | state_city = json.loads(url.read().decode()) 74 | 75 | 76 | l = ["India", "Mumbai"] 77 | for k, v in state_city.items(): 78 | l.append(k) 79 | l = l + v 80 | 81 | l = [ele.replace("*", "") for ele in l] 82 | 83 | 84 | def get_travel_status(span): 85 | if span.label_ == "GPE": 86 | prev_token = span.doc[span.start - 1] 87 | if prev_token.text in ("from", "through", "via", "Via"): 88 | return "from" 89 | elif prev_token.text in ("to", "and"): 90 | return "to" 91 | return None 92 | 93 | 94 | def get_nat(span): 95 | if span.label_ == "NORP": 96 | return span.text 97 | 98 | 99 | def get_rel(token): 100 | if token.text == "of": 101 | prev_token = token.doc[token.i - 1] 102 | prev2 = None 103 | if token.i > 2: 104 | prev2 = token.doc[token.i - 2] 105 | if prev2.text.lower() == "and" and str(token.doc[token.i - 3])[0] != "P": 106 | return f"{token.doc[token.i - 3]} {token.doc[token.i - 2]} {token.doc[token.i - 1]}" 107 | if prev_token.text.lower() in ("members", "member"): 108 | return "Family Member" 109 | else: 110 | return prev_token.text 111 | 112 | 113 | def extract_relationship(doc): 114 | ids = [] 115 | output = [] 116 | for tok in doc: 117 | if tok._.relationship: 118 | ids.append(tok.i + 1) 119 | ids.append(doc.__len__()) 120 | for i in range(len(ids) - 1): 121 | w = re.findall("P[0-9]+", str(doc[ids[i] : ids[i + 1]])) 122 | output.append({"link": doc[ids[i] - 1]._.relationship, "with": w}) 123 | return output 124 | 125 | 126 | def extract_travel_place(doc): 127 | travel = [] 128 | for ent in doc.ents: 129 | if ent._.travel_status: 130 | travel.append(ent.text) 131 | return list(map(acronym_to_country, travel)) 132 | 133 | 134 | def extract_nationality(doc): 135 | nat = [] 136 | for ent in doc.ents: 137 | if ent._.nationality: 138 | nat.append(ent._.nationality) 139 | return nat 140 | 141 | 142 | def extract_foreign(doc): 143 | is_foreign = [] 144 | for ent in doc.ents: 145 | if ent._.travel_status: 146 | is_foreign.append( 147 | { 148 | "place": acronym_to_country(ent.text), 149 | "is_foreign": not (ent.text in l), 150 | } 151 | ) 152 | return is_foreign 153 | 154 | 155 | Span.set_extension("travel_status", getter=get_travel_status, force=True) 156 | Span.set_extension("nationality", getter=get_nat, force=True) 157 | Token.set_extension("relationship", getter=get_rel, force=True) 158 | 159 | app = Flask(__name__) 160 | 161 | default_result = { 162 | "nationality": [], 163 | "travel": [], 164 | "relationship": [], 165 | "place_attributes": [], 166 | } 167 | 168 | 169 | @functools.lru_cache(30000) 170 | def record_processor(sent): 171 | logger.info(f"Travel Input: {sent}") 172 | if not sent: 173 | return default_result 174 | s = re.sub(r"[^\w\s]", " ", sent) 175 | doc = nlp(s) 176 | return { 177 | "nationality": extract_nationality(doc), 178 | "travel": extract_travel_place(doc), 179 | "relationship": extract_relationship(doc), 180 | "place_attributes": extract_foreign(doc), 181 | } 182 | 183 | 184 | def process_records(records): 185 | history = [] 186 | for r in records["patients"]: 187 | if not ("notes" in r.keys()): 188 | history.append(default_result) 189 | logger.info(f"ಥ_ಥ Missing Notes") 190 | else: 191 | history.append({r["patientId"]: record_processor(r["notes"])}) 192 | logger.info( 193 | f"Travel Output : {r['patientId']}: {record_processor(r['notes'])}" 194 | ) 195 | return {"patients": history} 196 | 197 | 198 | @app.route("/", methods=["POST"]) 199 | def single(): 200 | try: 201 | req_data = request.get_json() 202 | results = process_records(req_data) 203 | except TypeError: 204 | logger.info(f"ಠ~ಠ TypeError Aborting") 205 | logger.info(f"Error Data : {req_data}") 206 | abort(400) 207 | except KeyError: 208 | logger.info(f"ಠ╭╮ಠ KeyError Aborting") 209 | logger.info(f"Error Data : {req_data}") 210 | return jsonify(error="Not the correct request format!") 211 | return results 212 | 213 | 214 | #if __name__ == "__main__": 215 | # app.run() 216 | # app.run() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-geohash 2 | geopandas 3 | geopatra 4 | openlocationcode 5 | osm2geojson 6 | bqplot 7 | ipympl 8 | ipyvolume 9 | scipy 10 | -------------------------------------------------------------------------------- /test_relationship_server.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from relationship_server import process_records, record_processor 4 | 5 | 6 | def test_record_processor(): 7 | """ 8 | Testing record_processor: Should return empty list when there is no input. 9 | """ 10 | assert record_processor(" ") == { 11 | "nationality": [], 12 | "travel": [], 13 | "relationship": [], 14 | "place_attributes": [], 15 | } 16 | assert record_processor("") == { 17 | "nationality": [], 18 | "travel": [], 19 | "relationship": [], 20 | "place_attributes": [], 21 | } 22 | 23 | 24 | def test_nationality(): 25 | """Checks Nationality 26 | """ 27 | assert record_processor("Indian Student Studying in Italy") == { 28 | "nationality": ["Indian"], 29 | "travel": [], 30 | "relationship": [], 31 | "place_attributes": [], 32 | } 33 | 34 | 35 | def test_travel(): 36 | """Checks if the Travel place is returned with appropriate `place_attributes` 37 | """ 38 | assert record_processor("Traveled from Italy") == { 39 | "nationality": [], 40 | "travel": ["Italy"], 41 | "relationship": [], 42 | "place_attributes": [{"place": "Italy", "is_foreign": True}], 43 | } 44 | assert record_processor("Traveled to Pune") == { 45 | "nationality": [], 46 | "travel": ["Pune"], 47 | "relationship": [], 48 | "place_attributes": [{"place": "Pune", "is_foreign": False}], 49 | } 50 | assert record_processor("Traveled from United Kingdom") == { 51 | "nationality": [], 52 | "travel": ["United Kingdom"], 53 | "relationship": [], 54 | "place_attributes": [{"place": "United Kingdom", "is_foreign": True}], 55 | } 56 | 57 | 58 | def test_travel_acronymns(): 59 | """Checks for acronymn to country name mapping 60 | """ 61 | assert record_processor("Traveled to UK") == { 62 | "nationality": [], 63 | "travel": ["United Kingdom"], 64 | "relationship": [], 65 | "place_attributes": [{"place": "United Kingdom", "is_foreign": True}], 66 | } 67 | assert record_processor("Traveled from UK") == { 68 | "nationality": [], 69 | "travel": ["United Kingdom"], 70 | "relationship": [], 71 | "place_attributes": [{"place": "United Kingdom", "is_foreign": True}], 72 | } 73 | assert record_processor("Traveled from US") == { 74 | "nationality": [], 75 | "travel": ["United States"], 76 | "relationship": [], 77 | "place_attributes": [{"place": "United States", "is_foreign": True}], 78 | } 79 | assert record_processor("Traveled from USA") == { 80 | "nationality": [], 81 | "travel": ["United States"], 82 | "relationship": [], 83 | "place_attributes": [{"place": "United States", "is_foreign": True}], 84 | } 85 | assert record_processor("Traveled to UK and Japan") == { 86 | "nationality": [], 87 | "travel": ["United Kingdom", "Japan"], 88 | "relationship": [], 89 | "place_attributes": [ 90 | {"place": "United Kingdom", "is_foreign": True}, 91 | {"place": "Japan", "is_foreign": True}, 92 | ], 93 | } 94 | 95 | 96 | def test_travel_aliases(): 97 | """Checks for Aliases to country name mapping 98 | """ 99 | assert record_processor("Traveled from Republic of South Korea") == { 100 | "nationality": [], 101 | "travel": ["South Korea"], 102 | "relationship": [], 103 | "place_attributes": [{"place": "South Korea", "is_foreign": True},], 104 | } # Republic is picked up as a relationship 105 | 106 | assert record_processor("Traveled from Aphsny Axwynthkharra") == { 107 | "nationality": [], 108 | "travel": ["Abkhazia"], 109 | "relationship": [], 110 | "place_attributes": [{"place": "Abkhazia", "is_foreign": True},], 111 | } 112 | 113 | assert record_processor("Traveled from Holland") == { 114 | "nationality": [], 115 | "travel": ["Netherlands"], 116 | "relationship": [], 117 | "place_attributes": [{"place": "Netherlands", "is_foreign": True},], 118 | } 119 | 120 | 121 | def test_relationship(): 122 | """Relationship mapping 123 | """ 124 | assert record_processor("Son of P13") == { 125 | "nationality": [], 126 | "travel": [], 127 | "relationship": [{"link": "Son", "with": ["P13"]}], 128 | "place_attributes": [], 129 | } 130 | # Same link with Multiple people 131 | assert record_processor("Son of P13 and P14") == { 132 | "nationality": [], 133 | "travel": [], 134 | "relationship": [{"link": "Son", "with": ["P13", "P14"]}], 135 | "place_attributes": [], 136 | } 137 | # Multiple Links 138 | assert record_processor("Son of P13 and P14 friend of P16, P17, P18") == { 139 | "nationality": [], 140 | "travel": [], 141 | "relationship": [ 142 | {"link": "Son", "with": ["P13", "P14"]}, 143 | {"link": "friend", "with": ["P16", "P17", "P18"]}, 144 | ], 145 | "place_attributes": [], 146 | } 147 | 148 | 149 | def test_process_records(): 150 | """ 151 | """ 152 | records = { 153 | "patients": [ 154 | { 155 | "patientId": "1", 156 | "notes": "Indian Student Travelled from Italy, Family Member of P13 Friend with P12", 157 | } 158 | ] 159 | } 160 | assert process_records(records) == { 161 | "patients": [ 162 | { 163 | "1": { 164 | "nationality": ["Indian"], 165 | "travel": ["Italy"], 166 | "relationship": [{"link": "Family Member", "with": ["P13", "P12"]}], 167 | "place_attributes": [{"place": "Italy", "is_foreign": True}], 168 | } 169 | } 170 | ] 171 | } 172 | 173 | 174 | def test_input_error_missing_notes(): 175 | """process_records should return default output if notes are missing. 176 | """ 177 | records = {"patients": [{"patientId": "1"}]} 178 | assert process_records(records) == { 179 | "patients": [ 180 | { 181 | "nationality": [], 182 | "travel": [], 183 | "relationship": [], 184 | "place_attributes": [], 185 | } 186 | ] 187 | } 188 | 189 | 190 | def test_input_error_missing_patientId(): 191 | """process_records should throw KeyError if patientId is missing. 192 | """ 193 | records = { 194 | "patients": [ 195 | { 196 | "notes": "Indian Student Travelled from Italy, Family Member of P3 Friend with P2" 197 | } 198 | ] 199 | } 200 | with pytest.raises(Exception) as excinfo: 201 | process_records(records) 202 | assert type(excinfo.value) == KeyError 203 | assert excinfo.value.args[0] == "patientId" 204 | --------------------------------------------------------------------------------