├── automation ├── .tmp │ └── .gitignore ├── ocr │ ├── ocrconfig.meta │ ├── sk_districts.meta │ ├── nl_districts.meta │ ├── mz_districts.meta │ ├── hp_districts.meta │ ├── ocrconfig.meta.orig │ ├── kl_districts.meta │ ├── ut_districts.meta │ ├── mn_districts.meta │ ├── ap_districts.meta │ ├── ml_districts.meta │ ├── jk_districts.meta │ ├── hr_districts.meta │ ├── jh_districts.meta │ ├── pb_districts.meta │ ├── ar_districts.meta │ ├── rj_districts.meta │ ├── mh_districts.meta │ ├── ka_districts.meta │ ├── as_districts.meta │ ├── tn_districts.meta │ ├── mp_districts.meta │ ├── br_districts.meta │ ├── ct_districts.meta │ ├── ocr_vision.py │ ├── tg_districts.meta │ ├── up_districts.meta │ ├── ocr.sh │ └── googlevision.py ├── biharIndividual.py ├── misc │ ├── kabulletinextractor.py │ ├── tnfilegeneration.py │ └── kapatients.py ├── output2.out ├── automation.meta ├── nameMapping.meta ├── kaautomation.py ├── deltaCalculator.py ├── x └── automation.py ├── detailedflow.png ├── extract.sh ├── requirements.txt ├── .gitignore ├── extract.meta ├── extract.meta.bk ├── README.md └── extract.py /automation/.tmp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | */ 3 | !.gitignore -------------------------------------------------------------------------------- /detailedflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bee-rickey/webScraper/HEAD/detailedflow.png -------------------------------------------------------------------------------- /automation/ocr/ocrconfig.meta: -------------------------------------------------------------------------------- 1 | startingText:Araria 2 | enableTranslation:True 3 | translationFile:br_districts.meta 4 | yInterval:0 5 | xInterval:0 6 | -------------------------------------------------------------------------------- /automation/ocr/sk_districts.meta: -------------------------------------------------------------------------------- 1 | East Sikkim, East Sikkim 2 | North Sikkim, North Sikkim 3 | South Sikkim, South Sikkim 4 | West Sikkim, West Sikkim 5 | -------------------------------------------------------------------------------- /extract.sh: -------------------------------------------------------------------------------- 1 | today=`date +"%d_%b_%Y"` 2 | mkdir data/$today 3 | curl https://covid19.nagaland.gov.in/ > x.html 4 | python3 extract.py $today 5 | #git pull 6 | #git add --all 7 | #git commit -m "Folder creation" 8 | #git push --all 9 | -------------------------------------------------------------------------------- /automation/ocr/nl_districts.meta: -------------------------------------------------------------------------------- 1 | Dimapur,Dimapur 2 | Kiphire,Kiphire 3 | Kohima,Kohima 4 | Longleng,Longleng 5 | Mokokchung,Mokokchung 6 | Mon,Mon 7 | Peren,Peren 8 | Phek,Phek 9 | Tuensang,Tuensang 10 | Wokha,Wokha 11 | Zunheboto,Zunheboto 12 | -------------------------------------------------------------------------------- /automation/ocr/mz_districts.meta: -------------------------------------------------------------------------------- 1 | Aizawl,Aizawl 2 | Champhai,Champhai 3 | Kolasib,Kolasib 4 | Lawngtlai,Lawngtlai 5 | Lunglei,Lunglei 6 | Mamit,Mamit 7 | Saiha,Saiha 8 | Serchhip,Serchhip 9 | Saitual,Saitual 10 | Hnahthial,Hnahthial 11 | Khawzawl,Khawzawl 12 | -------------------------------------------------------------------------------- /automation/ocr/hp_districts.meta: -------------------------------------------------------------------------------- 1 | Bilaspur,Bilaspur 2 | Chamba,Chamba 3 | Hamirpur,Hamirpur 4 | Kangra,Kangra 5 | Kinnaur,Kinnaur 6 | Kullu,Kullu 7 | Lahaul and Spiti,Lahaul and Spiti 8 | Mandi,Mandi 9 | Shimla,Shimla 10 | Sirmaur,Sirmaur 11 | Solan,Solan 12 | Una,Una 13 | -------------------------------------------------------------------------------- /automation/ocr/ocrconfig.meta.orig: -------------------------------------------------------------------------------- 1 | startingText:@@startingText@@ 2 | enableTranslation:@@enableTranslation@@ 3 | translationFile:@@statename@@_districts.meta 4 | yInterval:@@yInterval@@ 5 | xInterval:@@xInterval@@ 6 | houghTransform:@@houghTransform@@ 7 | configMinLineLength:@@configMinLineLength@@ 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.9.3 2 | camelot-py==0.8.2 3 | google-cloud-vision==2.0.0 4 | html5lib==1.1 5 | matplotlib==3.3.2 6 | pandas==1.1.3 7 | pdftotext==2.1.5 8 | Pillow==8.2.0 9 | requests==2.24.0 10 | opencv-python==4.4.0.40 11 | fuzzywuzzy==0.18.0 12 | python-Levenshtein==0.12.0 13 | -------------------------------------------------------------------------------- /automation/ocr/kl_districts.meta: -------------------------------------------------------------------------------- 1 | Alappuzha,Alappuzha 2 | Ernakulam,Ernakulam 3 | Idukki,Idukki 4 | Kannur,Kannur 5 | Kasaragod,Kasaragod 6 | Kollam,Kollam 7 | Kottayam,Kottayam 8 | Kozhikode,Kozhikode 9 | Malappuram,Malappuram 10 | Palakkad,Palakkad 11 | Pathanamthitta,Pathanamthitta 12 | Thiruvananthapuram,Thiruvananthapuram 13 | Thrissur,Thrissur 14 | Wayanad,Wayanad 15 | -------------------------------------------------------------------------------- /automation/ocr/ut_districts.meta: -------------------------------------------------------------------------------- 1 | Almora,Almora 2 | Bageshwar,Bageshwar 3 | Chamoli,Chamoli 4 | Champawat,Champawat 5 | Dehradun,Dehradun 6 | Haridwar,Haridwar 7 | Nainital,Nainital 8 | Pauri Garhwal,Pauri Garhwal 9 | Pithoragarh,Pithoragarh 10 | Rudraprayag,Rudraprayag 11 | Tehri Garhwal,Tehri Garhwal 12 | Udham Singh Nagar,Udham Singh Nagar 13 | Uttarkashi,Uttarkashi 14 | -------------------------------------------------------------------------------- /automation/ocr/mn_districts.meta: -------------------------------------------------------------------------------- 1 | Bishnupur,Bishnupur 2 | Chandel,Chandel 3 | Churachandpur,Churachandpur 4 | Imphal East,Imphal East 5 | Imphal West,Imphal West 6 | Jiribam,Jiribam 7 | Kakching,Kakching 8 | Kamjong,Kamjong 9 | Kangpokpi,Kangpokpi 10 | Noney,Noney 11 | Pherzawl,Pherzawl 12 | Senapati,Senapati 13 | Tamenglong,Tamenglong 14 | Tengnoupal,Tengnoupal 15 | Thoubal,Thoubal 16 | Ukhrul,Ukhrul 17 | -------------------------------------------------------------------------------- /automation/ocr/ap_districts.meta: -------------------------------------------------------------------------------- 1 | Anantapur,Anantapur 2 | Chittoor,Chittoor 3 | East Godavari,East Godavari 4 | Guntur,Guntur 5 | Krishna,Krishna 6 | Kurnool,Kurnool 7 | Prakasam,Prakasam 8 | S.P.S. Nellore,S.P.S. Nellore 9 | Srikakulam,Srikakulam 10 | Visakhapatnam,Visakhapatnam 11 | Vizianagaram,Vizianagaram 12 | West Godavari,West Godavari 13 | Y.S.R. Kadapa,Y.S.R. Kadapa 14 | YSR Kadapa,Y.S.R. Kadapa 15 | -------------------------------------------------------------------------------- /automation/ocr/ml_districts.meta: -------------------------------------------------------------------------------- 1 | East Garo Hills,East Garo Hills 2 | East Jaintia Hills,East Jaintia Hills 3 | East Khasi Hills,East Khasi Hills 4 | North Garo Hills,North Garo Hills 5 | Ribhoi,Ribhoi 6 | South Garo Hills,South Garo Hills 7 | South West Garo Hills,South West Garo Hills 8 | South West Khasi Hills,South West Khasi Hills 9 | West Garo Hills,West Garo Hills 10 | West Jaintia Hills,West Jaintia Hills 11 | West Khasi Hills,West Khasi Hills 12 | -------------------------------------------------------------------------------- /automation/ocr/jk_districts.meta: -------------------------------------------------------------------------------- 1 | Anantnag,Anantnag 2 | Budgam,Budgam 3 | Bandipora,Bandipora 4 | Baramulla,Baramulla 5 | Doda,Doda 6 | Ganderbal,Ganderbal 7 | Jammu,Jammu 8 | Kathua,Kathua 9 | Kishtwar,Kishtwar 10 | Kulgam,Kulgam 11 | Kupwara,Kupwara 12 | Mirpur,Mirpur 13 | Muzaffarabad,Muzaffarabad 14 | Pulwama,Pulwama 15 | Punch,Punch 16 | Rajouri,Rajouri 17 | Ramban,Ramban 18 | Reasi,Reasi 19 | Samba,Samba 20 | Shopiyan,Shopiyan 21 | Srinagar,Srinagar 22 | Udhampur,Udhampur 23 | -------------------------------------------------------------------------------- /automation/ocr/hr_districts.meta: -------------------------------------------------------------------------------- 1 | Ambala,Ambala 2 | Bhiwani,Bhiwani 3 | Charkhi Dadri,Charkhi Dadri 4 | Faridabad,Faridabad 5 | Fatehabad,Fatehabad 6 | Gurugram,Gurugram 7 | Hisar,Hisar 8 | Jhajjar,Jhajjar 9 | Jind,Jind 10 | Kaithal,Kaithal 11 | Karnal,Karnal 12 | Kurukshetra,Kurukshetra 13 | Mahendragarh,Mahendragarh 14 | Nuh,Nuh 15 | Palwal,Palwal 16 | Panchkula,Panchkula 17 | Panipat,Panipat 18 | Rewari,Rewari 19 | Rohtak,Rohtak 20 | Sirsa,Sirsa 21 | Sonipat,Sonipat 22 | Yamunanagar,Yamunanagar 23 | -------------------------------------------------------------------------------- /automation/biharIndividual.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | testingNumbersFile = open("ocr/output.txt", "r") 4 | 5 | for index, line in enumerate(testingNumbersFile): 6 | if index == 0: 7 | continue 8 | outputString = "" 9 | linesArray = line.split('|')[0].split(',') 10 | 11 | gender = "F" if linesArray[1].strip() == "FEMALE" else "M" 12 | print("{}, {}, {}, {}, {}, {}, {}, {}".format(linesArray[2].strip(), gender, linesArray[3].strip().title(), linesArray[4].strip().title(), 'Bihar', 'BR', 1, 'Hospitalized')) 13 | -------------------------------------------------------------------------------- /automation/ocr/jh_districts.meta: -------------------------------------------------------------------------------- 1 | Bokaro,Bokaro 2 | Chatra,Chatra 3 | Deoghar,Deoghar 4 | Dhanbad,Dhanbad 5 | Dumka,Dumka 6 | Garhwa,Garhwa 7 | Giridih,Giridih 8 | Godda,Godda 9 | Gumla,Gumla 10 | Hazaribagh,Hazaribagh 11 | Jamtara,Jamtara 12 | Khunti,Khunti 13 | Koderma,Koderma 14 | Latehar,Latehar 15 | Lohardaga,Lohardaga 16 | Pakur,Pakur 17 | Palamu,Palamu 18 | West Singhbhum,West Singhbhum 19 | East Singhbhum,East Singhbhum 20 | Ramgarh,Ramgarh 21 | Ranchi,Ranchi 22 | Sahibganj,Sahibganj 23 | Saraikela-Kharsawan,Saraikela-Kharsawan 24 | Simdega,Simdega 25 | West, West Singhbhum 26 | -------------------------------------------------------------------------------- /automation/ocr/pb_districts.meta: -------------------------------------------------------------------------------- 1 | Amritsar,Amritsar 2 | Barnala,Barnala 3 | Bathinda,Bathinda 4 | Faridkot,Faridkot 5 | Fatehgarh Sahib,Fatehgarh Sahib 6 | Fazilka,Fazilka 7 | Ferozepur,Ferozepur 8 | Gurdaspur,Gurdaspur 9 | Hoshiarpur,Hoshiarpur 10 | Jalandhar,Jalandhar 11 | Kapurthala,Kapurthala 12 | Ludhiana,Ludhiana 13 | Mansa,Mansa 14 | Moga,Moga 15 | Pathankot,Pathankot 16 | Patiala,Patiala 17 | Rupnagar,Rupnagar 18 | S.A.S. Nagar,S.A.S. Nagar 19 | Sangrur,Sangrur 20 | Shahid Bhagat Singh Nagar,Shahid Bhagat Singh Nagar 21 | Sri Muktsar Sahib,Sri Muktsar Sahib 22 | Tarn Taran,Tarn Taran 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | x.html 2 | .extract.meta.swp 3 | automation/orsite.csv 4 | *.swp 5 | *.swp 6 | automation/deltaCalculator.log 7 | automation/automated.txt 8 | automation/__pycache__/deltaCalculator.cpython-37.pyc 9 | automation/br.txt 10 | .DS_Store 11 | automation/HR.txt 12 | automation/hr.pdf.txt 13 | automation/hr.pdf 14 | automation/hr.csv 15 | *.txt 16 | *.csv 17 | *.pdf 18 | *.jpg 19 | *.png 20 | automation/ocr/ocrconfig.meta 21 | automation/ocr/ocrconfig.meta 22 | automation/deltaCalculator.log 23 | automation/ocr/ocrconfig.meta 24 | automation/ocr/ocrconfig.meta 25 | 26 | venv/ 27 | __pycache__ 28 | !requirements.txt 29 | 30 | .vscode 31 | automation/ocr/ocrconfig.meta 32 | -------------------------------------------------------------------------------- /automation/ocr/ar_districts.meta: -------------------------------------------------------------------------------- 1 | Anjaw,Anjaw 2 | Changlang,Changlang 3 | East Kameng,East Kameng 4 | East Siang,East Siang 5 | Kamle,Kamle 6 | Kra Daadi,Kra Daadi 7 | Kurung Kumey,Kurung Kumey 8 | Lepa Rada,Lepa Rada 9 | Lohit,Lohit 10 | Longding,Longding 11 | Lower Dibang Valley,Lower Dibang Valley 12 | Lower Siang,Lower Siang 13 | Lower Subansiri,Lower Subansiri 14 | Namsai,Namsai 15 | Pakke Kessang,Pakke Kessang 16 | Papum Pare,Papum Pare 17 | Shi Yomi,Shi Yomi 18 | Siang,Siang 19 | Tawang,Tawang 20 | Tirap,Tirap 21 | Upper Dibang Valley,Upper Dibang Valley 22 | Upper Siang,Upper Siang 23 | Upper Subansiri,Upper Subansiri 24 | West Kameng,West Kameng 25 | West Siang,West Siang 26 | -------------------------------------------------------------------------------- /automation/misc/kabulletinextractor.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pdftotext 3 | import re 4 | from bs4 import BeautifulSoup 5 | 6 | 7 | 8 | response = requests.request("GET", "https://covid19.karnataka.gov.in/new-page/Health%20Department%20Bulletin/en") 9 | soup = BeautifulSoup(response.content, 'html5lib') 10 | 11 | rows = soup.find_all("td") 12 | 13 | for row in rows: 14 | aTag = row.find("a") 15 | if aTag is not None: 16 | if 'karnataka.gov' in aTag['href']: 17 | fileNameArray = aTag['href'].split('/') 18 | fileName = fileNameArray[len(fileNameArray) - 1] 19 | r = requests.get(aTag['href'], allow_redirects=True) 20 | open("KABulletin/" + fileName, 'wb').write(r.content) 21 | -------------------------------------------------------------------------------- /automation/ocr/rj_districts.meta: -------------------------------------------------------------------------------- 1 | Ajmer,Ajmer 2 | Alwar,Alwar 3 | Banswara,Banswara 4 | Baran,Baran 5 | Barmer,Barmer 6 | Bharatpur,Bharatpur 7 | Bhilwara,Bhilwara 8 | Bikaner,Bikaner 9 | Bundi,Bundi 10 | Chittorgarh,Chittorgarh 11 | Churu,Churu 12 | Dausa,Dausa 13 | Dholpur,Dholpur 14 | Dungarpur,Dungarpur 15 | Ganganagar,Ganganagar 16 | Hanumangarh,Hanumangarh 17 | Jaipur,Jaipur 18 | Jaisalmer,Jaisalmer 19 | Jalore,Jalore 20 | Jhalawar,Jhalawar 21 | Jhunjhunu,Jhunjhunu 22 | Jodhpur,Jodhpur 23 | Karauli,Karauli 24 | Kota,Kota 25 | Nagaur,Nagaur 26 | Pali,Pali 27 | Pratapgarh,Pratapgarh 28 | Rajsamand,Rajsamand 29 | Sawai Madhopur,Sawai Madhopur 30 | Sikar,Sikar 31 | Sirohi,Sirohi 32 | Tonk,Tonk 33 | Udaipur,Udaipur 34 | -------------------------------------------------------------------------------- /automation/ocr/mh_districts.meta: -------------------------------------------------------------------------------- 1 | Ahmednagar,Ahmednagar 2 | Akola,Akola 3 | Amravati,Amravati 4 | Aurangabad,Aurangabad 5 | Bhandara,Bhandara 6 | Beed,Beed 7 | Buldhana,Buldhana 8 | Chandrapur,Chandrapur 9 | Dhule,Dhule 10 | Gadchiroli,Gadchiroli 11 | Gondia,Gondia 12 | Hingoli,Hingoli 13 | Jalgaon,Jalgaon 14 | Jalna,Jalna 15 | Kolhapur,Kolhapur 16 | Latur,Latur 17 | Mumbai,Mumbai 18 | Mumbai Suburban,Mumbai Suburban 19 | Nagpur,Nagpur 20 | Nanded,Nanded 21 | Nandurbar,Nandurbar 22 | Nashik,Nashik 23 | Osmanabad,Osmanabad 24 | Palghar,Palghar 25 | Parbhani,Parbhani 26 | Pune,Pune 27 | Raigad,Raigad 28 | Ratnagiri,Ratnagiri 29 | Sangli,Sangli 30 | Satara,Satara 31 | Sindhudurg,Sindhudurg 32 | Solapur,Solapur 33 | Thane,Thane 34 | Wardha,Wardha 35 | Washim,Washim 36 | Yavatmal,Yavatmal 37 | -------------------------------------------------------------------------------- /automation/ocr/ka_districts.meta: -------------------------------------------------------------------------------- 1 | Others,Others 2 | Bagalakote,Bagalakote 3 | Ballari,Ballari 4 | Belagavi,Belagavi 5 | Bengaluru Urban,Bengaluru Urban 6 | Bengaluru Rural,Bengaluru Rural 7 | Bidar,Bidar 8 | Chamarajanagara,Chamarajanagara 9 | Chikkaballapura,Chikkaballapura 10 | Chikkamagaluru,Chikkamagaluru 11 | Chitradurga,Chitradurga 12 | Dakshina Kannada,Dakshina Kannada 13 | Davanagere,Davanagere 14 | Dharwad,Dharwad 15 | Gadag,Gadag 16 | Hassan,Hassan 17 | Haveri,Haveri 18 | Kalaburagi,Kalaburagi 19 | Kodagu,Kodagu 20 | Kolar,Kolar 21 | Koppal,Koppal 22 | Mandya,Mandya 23 | Mysuru,Mysuru 24 | Raichur,Raichur 25 | Ramanagara,Ramanagara 26 | Shivamogga,Shivamogga 27 | Tumakuru,Tumakuru 28 | Udupi,Udupi 29 | Uttara Kannada,Uttara Kannada 30 | Vijayapura,Vijayapura 31 | Yadgir,Yadgir 32 | -------------------------------------------------------------------------------- /automation/ocr/as_districts.meta: -------------------------------------------------------------------------------- 1 | Baksa,Baksa 2 | Barpeta,Barpeta 3 | Biswanath,Biswanath 4 | Bongaigaon,Bongaigaon 5 | Cachar,Cachar 6 | Charaideo,Charaideo 7 | Chirang,Chirang 8 | Darrang,Darrang 9 | Dhemaji,Dhemaji 10 | Dhubri,Dhubri 11 | Dibrugarh,Dibrugarh 12 | Dima Hasao,Dima Hasao 13 | Goalpara,Goalpara 14 | Golaghat,Golaghat 15 | Hailakandi,Hailakandi 16 | Hojai,Hojai 17 | Jorhat,Jorhat 18 | Kamrup,Kamrup 19 | Kamrup Metropolitan,Kamrup Metropolitan 20 | Karbi Anglong,Karbi Anglong 21 | Karimganj,Karimganj 22 | Kokrajhar,Kokrajhar 23 | Lakhimpur,Lakhimpur 24 | Majuli,Majuli 25 | Morigaon,Morigaon 26 | Nagaon,Nagaon 27 | Nalbari,Nalbari 28 | Sivasagar,Sivasagar 29 | Sonitpur,Sonitpur 30 | South Salmara Mankachar,South Salmara Mankachar 31 | Tinsukia,Tinsukia 32 | Udalguri,Udalguri 33 | West Karbi Anglong,West Karbi Anglong 34 | -------------------------------------------------------------------------------- /automation/output2.out: -------------------------------------------------------------------------------- 1 | 16-03-2021, India, "", 1798467, 25709257, 5366679, 33886, 25709257, 6161488, 13993527, 11713149, 2581, 2557019, 29313725 2 | 16-03-2021, Andaman and Nicobar Islands, "", 328, 11950, 700, 7, 11950, 2876, 7391, 4559, 0, 0, 14826 3 | 16-03-2021, Andaman and Nicobar Islands, "North and Middle Andaman", 79, 2245, 300, 3, 2245, 929, 1086, 1159, 0, 0, 3174 4 | 16-03-2021, Andaman and Nicobar Islands, "South Andaman", 178, 8879, 300, 3, 8879, 1650, 5874, 3005, 0, 0, 10529 5 | 16-03-2021, Andaman and Nicobar Islands, "Nicobar", 71, 826, 100, 1, 826, 297, 431, 395, 0, 0, 1123 6 | 16-03-2021, Andhra Pradesh, "", 67824, 976078, 331083, 2204, 976078, 257997, 501439, 474547, 92, 145251, 1088824 7 | 16-03-2021, Andhra Pradesh, "Krishna", 5537, 83524, 28619, 191, 83524, 22711, 42137, 41385, 2, 23600, 82635 8 | 16-03-2021, Andhra Pradesh, "Guntur", 5117, 86069, 34150, 227, 86069, 20637, 46752, 39311, 6, 23124, 83582 9 | -------------------------------------------------------------------------------- /automation/ocr/tn_districts.meta: -------------------------------------------------------------------------------- 1 | Ariyalur,Ariyalur 2 | Chennai,Chennai 3 | Coimbatore,Coimbatore 4 | Cuddalore,Cuddalore 5 | Dharmapuri,Dharmapuri 6 | Dindigul,Dindigul 7 | Erode,Erode 8 | Kancheepuram,Kancheepuram 9 | Kanyakumari,Kanyakumari 10 | Karur,Karur 11 | Krishnagiri,Krishnagiri 12 | Madurai,Madurai 13 | Nagapattinam,Nagapattinam 14 | Namakkal,Namakkal 15 | Perambalur,Perambalur 16 | Pudukkottai,Pudukkottai 17 | Ramanathapuram,Ramanathapuram 18 | Salem,Salem 19 | Sivaganga,Sivaganga 20 | Thanjavur,Thanjavur 21 | Nilgiris,Nilgiris 22 | Theni,Theni 23 | Thiruvallur,Thiruvallur 24 | Thiruvarur,Thiruvarur 25 | Thoothukkudi,Thoothukkudi 26 | Tiruchirappalli,Tiruchirappalli 27 | Tirunelveli,Tirunelveli 28 | Tiruppur,Tiruppur 29 | Tiruvannamalai,Tiruvannamalai 30 | Vellore,Vellore 31 | Viluppuram,Viluppuram 32 | Virudhunagar,Virudhunagar 33 | Tenkasi,Tenkasi 34 | Chengalpattu,Chengalpattu 35 | Ranipet,Ranipet 36 | Kallakurichi,Kallakurichi 37 | Tirupathur,Tirupathur 38 | -------------------------------------------------------------------------------- /automation/ocr/mp_districts.meta: -------------------------------------------------------------------------------- 1 | Agar Malwa,Agar Malwa 2 | Alirajpur,Alirajpur 3 | Anuppur,Anuppur 4 | Ashoknagar,Ashoknagar 5 | Balaghat,Balaghat 6 | Barwani,Barwani 7 | Betul,Betul 8 | Bhind,Bhind 9 | Bhopal,Bhopal 10 | Burhanpur,Burhanpur 11 | Chhatarpur,Chhatarpur 12 | Chhindwara,Chhindwara 13 | Damoh,Damoh 14 | Datia,Datia 15 | Dewas,Dewas 16 | Dhar,Dhar 17 | Dindori,Dindori 18 | Khandwa,Khandwa 19 | Guna,Guna 20 | Gwalior,Gwalior 21 | Harda,Harda 22 | Hoshangabad,Hoshangabad 23 | Indore,Indore 24 | Jabalpur,Jabalpur 25 | Jhabua,Jhabua 26 | Katni,Katni 27 | Mandla,Mandla 28 | Mandsaur,Mandsaur 29 | Morena,Morena 30 | Narsinghpur,Narsinghpur 31 | Neemuch,Neemuch 32 | Niwari,Niwari 33 | Panna,Panna 34 | Raisen,Raisen 35 | Rajgarh,Rajgarh 36 | Ratlam,Ratlam 37 | Rewa,Rewa 38 | Sagar,Sagar 39 | Satna,Satna 40 | Sehore,Sehore 41 | Seoni,Seoni 42 | Shahdol,Shahdol 43 | Shajapur,Shajapur 44 | Sheopur,Sheopur 45 | Shivpuri,Shivpuri 46 | Sidhi,Sidhi 47 | Singrauli,Singrauli 48 | Tikamgarh,Tikamgarh 49 | Ujjain,Ujjain 50 | Umaria,Umaria 51 | Vidisha,Vidisha 52 | Khargone,Khargone 53 | -------------------------------------------------------------------------------- /extract.meta: -------------------------------------------------------------------------------- 1 | http://www.rajswasthya.nic.in/, Rajasthan, Districts 2 | https://statedashboard.odisha.gov.in/,Odisha, Districts 3 | https://covid19dashboard.py.gov.in/QuarantineReport,Puducherry, Districts 4 | https://dashboard.kerala.gov.in/testing-view-public.php, Kerala, Districts 5 | https://gujcovid19.gujarat.gov.in/, Gujarat, Districts 6 | http://covid19.ap.gov.in/Covid19_Admin/api/CV/DashboardCountAPI, Andhra Pradesh, Districts 7 | http://hmfw.ap.gov.in/covid_dashboard.aspx, Andhra Pradesh, 8 | http://covid19.itanagarsmartcity.in/covidstatus.php,Arunachal Pradesh, 9 | http://chdcovid19.in/, Chandigarh, 10 | https://nhm.goa.gov.in/,Goa, 11 | https://gujcovid19.gujarat.gov.in/, Gujarat, 12 | https://dashboard.kerala.gov.in/index.php, Kerala, 13 | https://corona.meghalayagov.in/, Meghalaya, 14 | https://covid19.nagaland.gov.in/,Nagaland, 15 | https://statedashboard.odisha.gov.in/, Odisha, 16 | https://covid19dashboard.py.gov.in/, Puducherry, 17 | https://covid19sikkim.org/, Sikkim, 18 | http://www.rajswasthya.nic.in/, Rajasthan, 19 | https://covid19.tripura.gov.in/Visitor/ViewStatus.aspx, Tripura, 20 | -------------------------------------------------------------------------------- /extract.meta.bk: -------------------------------------------------------------------------------- 1 | http://www.rajswasthya.nic.in/, Rajasthan, Districts 2 | https://gujcovid19.gujarat.gov.in/, Gujarat, 3 | https://gujcovid19.gujarat.gov.in/, Gujarat, Districts 4 | http://covid19.ap.gov.in/Covid19_Admin/api/CV/DashboardCountAPI, Andhra Pradesh, Districts 5 | https://gujcovid19.gujarat.gov.in/, Gujarat, Districts 6 | https://statedashboard.odisha.gov.in/,Odisha, Districts 7 | https://covid19dashboard.py.gov.in/QuarantineReport,Puducherry, Districts 8 | https://dashboard.kerala.gov.in/testing-view-public.php, Kerala, Districts 9 | http://hmfw.ap.gov.in/covid_dashboard.aspx, Andhra Pradesh, 10 | http://covid19.itanagarsmartcity.in/covidstatus.php,Arunachal Pradesh, 11 | http://chdcovid19.in/, Chandigarh, 12 | https://gujcovid19.gujarat.gov.in/, Gujarat, 13 | https://nhm.goa.gov.in/,Goa, 14 | https://dashboard.kerala.gov.in/index.php, Kerala, 15 | https://corona.meghalayagov.in/, Meghalaya, 16 | https://covid19.nagaland.gov.in/,Nagaland, 17 | https://statedashboard.odisha.gov.in/, Odisha, 18 | https://covid19dashboard.py.gov.in/, Puducherry, 19 | https://covid19sikkim.org/, Sikkim, 20 | http://www.rajswasthya.nic.in/, Rajasthan, 21 | https://covid19.tripura.gov.in/Visitor/ViewStatus.aspx, Tripura, 22 | -------------------------------------------------------------------------------- /automation/ocr/br_districts.meta: -------------------------------------------------------------------------------- 1 | वांका , Banka 2 | बका , Banka 3 | मधुवनी, Madhubani 4 | अररिया , Araria 5 | अरवल , Arwal 6 | औरंगाबाद , Aurangabad 7 | बांका , Banka 8 | बेगुसराय , Begusarai 9 | बेगूसराय , Begusarai 10 | वेगूसराय , Begusarai 11 | भागलपुर , Bhagalpur 12 | भोजपुर , Bhojpur 13 | बक्सर, Buxar 14 | वक्सर , Buxar 15 | दरभंगा , Darbhanga 16 | गया , Gaya 17 | गोपालगंज , Gopalganj 18 | जमुई , Jamui 19 | जहानाबाद , Jehanabad 20 | कैमूर , Kaimur 21 | कटिहार , Katihar 22 | खगड़िया , Khagaria 23 | किशनगंज , Kishanganj 24 | लखीसराय , Lakhisarai 25 | मधेपुरा , Madhepura 26 | मधुबनी , Madhubani 27 | मुंगेर , Munger 28 | मुजफ्फरपुर , Muzaffarpur 29 | नालंदा , Nalanda 30 | नालन्दा , Nalanda 31 | नवादा , Nawada 32 | पश्चिम , West Champaran 33 | पक्षिम, West Champaran 34 | पश्चिम चंपारण, West Champaran 35 | पश्चिमी, West Champaran 36 | पटना , Patna 37 | पूर्वी चम्पारण, East Champaran 38 | पूर्वी, East Champaran 39 | पूर्वी चंपारण , East Champaran 40 | पूर्णिया , Purnia 41 | पूर्णियाँ , Purnia 42 | रोहतास , Rohtas 43 | सहरसा , Saharsa 44 | समस्तीपुर , Samastipur 45 | सारण , Saran 46 | शेखपुरा , Sheikhpura 47 | शिवहर , Sheohar 48 | सीतामढ़ी , Sitamarhi 49 | सिवान , Siwan 50 | सुपौल , Supaul 51 | सुपीत , Supaul 52 | वैशाली , Vaishali 53 | -------------------------------------------------------------------------------- /automation/ocr/ct_districts.meta: -------------------------------------------------------------------------------- 1 | दुर्ग, Durg 2 | दर्ग, Durg 3 | राजनांदगांव , Rajnandgaon 4 | राजनादगाव , Rajnandgaon 5 | राजनांदगाव , Rajnandgaon 6 | बालोद , Balod 7 | बेमेतरा , Bametara 8 | कबीरधाम , Kabeerdham 9 | रायपुर , Raipur 10 | रायगढ़ , Raigarh 11 | धमतरी , Dhamtari 12 | बलौदाबाज़ार , Baloda Bazar 13 | बलौदाबाजार , Baloda Bazar 14 | महासमुंद , Mahasamund 15 | महासमुद , Mahasamund 16 | गरियाबंद , Gariaband 17 | बिलासपुर , Bilaspur 18 | कोरबा , Korba 19 | जांजगीर - चांपा , Janjgir Champa 20 | मुंगेली , Mungeli 21 | मगेली , Mungeli 22 | मंगेली , Mungeli 23 | मगला , Mungeli 24 | गौरेला पेंड्रा मरवाही , Gaurela Pendra Marwahi 25 | गौरेला पेड़ा मरवाही , Gaurela Pendra Marwahi 26 | सरगुजा , Surguja 27 | कोरिया , Koriya 28 | सूरजपुर , Surajpur 29 | बलरामपुर , Balrampur 30 | बलासपुर , Balrampur 31 | जशपुर , Jashpur 32 | बस्तर , Bastar 33 | कोंडागांव , Kondagaon 34 | दंतेवाडा , Dakshin Bastar Dantewada 35 | दंतेवाड़ा , Dakshin Bastar Dantewada 36 | सुकमा , Sukma 37 | कांकेर , Uttar Bastar Kanker 38 | काकेर , Uttar Bastar Kanker 39 | काकर, Uttar Bastar Kanker 40 | नारायणपुर , Narayanpur 41 | बीजापुर , Bijapur 42 | बीजापूर , Bijapur 43 | अन्य , Other State 44 | अन्य राज्य , Other State 45 | -------------------------------------------------------------------------------- /automation/misc/tnfilegeneration.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pdftotext 3 | import re 4 | 5 | 6 | def is_number(s): 7 | try: 8 | int(s) 9 | return True 10 | except ValueError: 11 | return False 12 | 13 | url = "http://www.nhmharyana.gov.in/WriteReadData/userfiles/file/CoronaVirus/Daily%20Bulletin%20of%20COVID%2019%20as%20on%209-06-2020%20Evening.pdf" 14 | r = requests.get(url, allow_redirects=True) 15 | open('hr.pdf', 'wb').write(r.content) 16 | 17 | with open("ka.pdf", "rb") as f: 18 | pdf = pdftotext.PDF(f) 19 | 20 | recoveryKa = open("ka.pdf.txt", "w") 21 | pid = input("Enter district page:") 22 | print(pdf[int(pid)] , file = recoveryKa) 23 | recoveryKa.close() 24 | 25 | tnFile = open('ka.pdf.txt', 'r') 26 | lines = tnFile.readlines() 27 | tnOutputFile = open('ka.csv', 'w') 28 | 29 | startedReadingDistricts = False 30 | for line in lines: 31 | if len(line) == 0: 32 | continue 33 | 34 | if 'Yadagiri' in line: 35 | startedReadingDistricts = True 36 | if 'Total' in line: 37 | startedReadingDistricts = False 38 | continue 39 | if startedReadingDistricts == False: 40 | continue 41 | 42 | line = re.sub(' +', ',', re.sub('^ +', '', line)) 43 | 44 | linesArray = line.split(',') 45 | print(linesArray) 46 | 47 | tnOutputFile.close() 48 | -------------------------------------------------------------------------------- /automation/ocr/ocr_vision.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import io 4 | import pickle 5 | from google.cloud import vision 6 | 7 | fileName = "" 8 | 9 | def detect_text(path): 10 | """Detects text in the file.""" 11 | client = vision.ImageAnnotatorClient() 12 | 13 | with io.open(path, 'rb') as image_file: 14 | content = image_file.read() 15 | 16 | image = vision.Image(content=content) 17 | 18 | response = client.document_text_detection(image=image) 19 | texts = response.text_annotations 20 | print(texts) 21 | with io.open('poly.txt', 'w') as boundsFile: 22 | print(texts, file = boundsFile) 23 | boundsFile.close() 24 | 25 | # Save output 26 | 27 | for text in texts: 28 | vertices = (['{},{}'.format(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]) 29 | print('{}'.format(text.description), end ="|") 30 | print('bounds|{}'.format('|'.join(vertices))) 31 | 32 | if response.error.message: 33 | raise Exception( 34 | '{}\nFor more info on error messages, check: ' 35 | 'https://cloud.google.com/apis/design/errors'.format( 36 | response.error.message)) 37 | 38 | 39 | def main(): 40 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../../../visionapi.json" 41 | path = fileName 42 | 43 | # Do OCR 44 | detect_text(path) 45 | 46 | if __name__ == "__main__": 47 | fileName = sys.argv[1] 48 | main() 49 | -------------------------------------------------------------------------------- /automation/ocr/tg_districts.meta: -------------------------------------------------------------------------------- 1 | Adilabad,Adilabad 2 | Bhadradri Kothagudem,Bhadradri Kothagudem 3 | Bhadradri kothagudem,Bhadradri Kothagudem 4 | GHMC,Hyderabad 5 | Jagtial,Jagtial 6 | Jagityal,Jagtial 7 | Jangaon,Jangaon 8 | Jayashankar Bhupalapally,Jayashankar Bhupalapally 9 | Jayashankar bhupalpally,Jayashankar Bhupalapally 10 | Jogulamba Gadwal,Jogulamba Gadwal 11 | Jogulamba gadwal,Jogulamba Gadwal 12 | Kamareddy,Kamareddy 13 | Karimnagar,Karimnagar 14 | Khammam,Khammam 15 | Komaram Bheem,Komaram Bheem 16 | Komarambheem asifabad,Komaram Bheem 17 | Mahabubabad,Mahabubabad 18 | Mahabubnagar,Mahabubnagar 19 | Mahaboobnagar,Mahabubnagar 20 | Mancherial,Mancherial 21 | Medak,Medak 22 | Medchal Malkajgiri,Medchal Malkajgiri 23 | Medchal malkajigiri,Medchal Malkajgiri 24 | Mulugu,Mulugu 25 | Nagarkurnool,Nagarkurnool 26 | Nalgonda,Nalgonda 27 | Narayanpet,Narayanpet 28 | Nirmal,Nirmal 29 | Nizamabad,Nizamabad 30 | Peddapalli,Peddapalli 31 | Rajanna Sircilla,Rajanna Sircilla 32 | Rajanna siricilla,Rajanna Sircilla 33 | Ranga Reddy,Ranga Reddy 34 | Rangareddy,Ranga Reddy 35 | Sangareddy,Sangareddy 36 | Siddipet,Siddipet 37 | Suryapet,Suryapet 38 | Vikarabad,Vikarabad 39 | Wanaparthy,Wanaparthy 40 | Warangal Rural,Warangal Rural 41 | Warangal rural,Warangal Rural 42 | Warangal Urban,Warangal Urban 43 | Warangal urban,Warangal Urban 44 | Yadadri Bhuvanagiri,Yadadri Bhuvanagiri 45 | Yadadri, Yadadri Bhuvanagiri 46 | Yadadri bhonigir,Yadadri Bhuvanagiri 47 | -------------------------------------------------------------------------------- /automation/automation.meta: -------------------------------------------------------------------------------- 1 | Vaccine, VC, https://api.cowin.gov.in/api/v1/reports/getPublicReports?state_id=@@state_id@@&district_id=@@district_id@@&date=@@date@@ 2 | VCMohfw, VCM,https://raw.githubusercontent.com/datameet/covid19/master/downloads/mohfw-backup/cumulative_vaccination_coverage/ 3 | Andhra Pradesh, AP, http://hmfw.ap.gov.in/covid_dashboard.aspx 4 | Arunachal Pradesh, AR, 5 | #Goa, GA, https://www.goa.gov.in/covid-19/ 6 | Odisha, OR, https://health.odisha.gov.in/js/distDtls.js 7 | Rajasthan, RJ, http://www.rajswasthya.nic.in/ 8 | Maharashtra, MH, https://www.covid19maharashtragov.in/mh-covid/dbd-cases-file?_by=District&_by=Date 9 | Telangana, TG, 10 | Gujarat, GJ,https://gujcovid19.gujarat.gov.in/DrillDownCharts.aspx/GetDistDataForLineCovidDisrtict 11 | Uttar Pradesh, UP, 12 | Bihar, BR, 13 | Chhattisgarh, CT, 14 | Nagaland, NL, https://covid19.nagaland.gov.in/health-main 15 | Mizoram, MZ, 16 | Manipur, MN, 17 | Assam, AS, https://covid19.assam.gov.in/all-districts/ 18 | Tripura, TR, https://covid19.tripura.gov.in/Visitor/ViewStatus.aspx 19 | Puducherry, PY, https://covid19dashboard.py.gov.in/Reporting/DateWise 20 | Chandigarh, CH, http://chdcovid19.in/ 21 | Kerala, KL, https://dashboard.kerala.gov.in/maps/outside.geojson 22 | KeralaDeaths, KLD, https://dashboard.kerala.gov.in/maps/outside.geojson 23 | Ladakh, LA, http://covid.ladakh.gov.in/#dataInsights 24 | Punjab, PB, 25 | Tamil Nadu, TN, 26 | Madhya Pradesh, MP, 27 | Karnataka, KA, 28 | West Bengal, WB, 29 | Haryana, HR, 30 | Himachal Pradesh, HP, 31 | Jammu and Kashmir, JK, 32 | Jharkhand, JH, 33 | Uttarakhand, UT, 34 | Meghalaya, ML, https://services7.arcgis.com/nzBTI19PTHBZaEPT/arcgis/rest/services/Admin_Boundary/FeatureServer/1/query?f=json&returnGeometry=false&outFields=*&where=1=1 35 | #Mizoram, MZ, https://mcovid19.mizoram.gov.in/api/home-stats 36 | -------------------------------------------------------------------------------- /automation/misc/kapatients.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pdftotext 3 | import PyPDF2 as pypdf 4 | import camelot 5 | import re 6 | import datetime 7 | import matplotlib.pyplot as plt 8 | from deltaCalculator import DeltaCalculator 9 | 10 | 11 | def is_number(s): 12 | try: 13 | int(s) 14 | return True 15 | except ValueError: 16 | return False 17 | 18 | tables = camelot.read_pdf('ka.pdf',strip_text='\n', pages="5,6,7,8", split_text = True) 19 | 20 | print(len(tables)) 21 | for index, table in enumerate(tables): 22 | tables[index].to_csv('ka' + str(index) + '.csv') 23 | 24 | kaOutputFile = open('kafull.csv', 'w') 25 | for index, table in enumerate(tables): 26 | kaFile = open('ka' + str(index) + '.csv', 'r') 27 | lines = kaFile.readlines() 28 | 29 | for line in lines: 30 | line = line.replace('\"', '') 31 | linesArray = line.split(',') 32 | if len(linesArray[7]) == 0: 33 | continue 34 | 35 | gender = "" 36 | if linesArray[6].strip() == 'Female': 37 | gender = 'F' 38 | elif linesArray[6].strip() == 'Male': 39 | gender = 'M' 40 | else: 41 | gender = 'Non-Binary' 42 | 43 | print("{},{},{},{},{},{},{},{},{},{},{}".format(linesArray[4].replace('P-', 'KA-P'), datetime.date.today().strftime("%d/%m/%Y"), linesArray[5], gender,'',linesArray[7],'Karnataka', 'KA', 1, 'Hospitalized',linesArray[8]), file = kaOutputFile) 44 | 45 | kaOutputFile.close() 46 | 47 | ##camelot.plot(tables[0], kind = "contour") 48 | #plt.show() 49 | 50 | 51 | """ 52 | pdfobject=open('ka.pdf','rb') 53 | pdf=pypdf.PdfFileReader(pdfobject) 54 | print(pdf.extractText()) 55 | 56 | url = "http://www.nhmharyana.gov.in/WriteReadData/userfiles/file/CoronaVirus/Daily%20Bulletin%20of%20COVID%2019%20as%20on%209-06-2020%20Evening.pdf" 57 | r = requests.get(url, allow_redirects=True) 58 | open('hr.pdf', 'wb').write(r.content) 59 | 60 | with open("ka.pdf", "rb") as f: 61 | pdf = pdftotext.PDF(f) 62 | 63 | recoveryKa = open("ka.pdf.txt", "w") 64 | pid = input("Enter district page:") 65 | print(pdf[int(pid)] , file = recoveryKa) 66 | recoveryKa.close() 67 | 68 | tnFile = open('ka.pdf.txt', 'r') 69 | lines = tnFile.readlines() 70 | tnOutputFile = open('ka.csv', 'w') 71 | 72 | startedReadingDistricts = False 73 | for line in lines: 74 | if len(line) == 0: 75 | continue 76 | print(line) 77 | 78 | if 'Yadagiri' in line: 79 | startedReadingDistricts = True 80 | if 'Total' in line: 81 | startedReadingDistricts = False 82 | continue 83 | if startedReadingDistricts == False: 84 | continue 85 | 86 | line = re.sub(' +', ',', re.sub('^ +', '', line)) 87 | 88 | linesArray = line.split(',') 89 | print(linesArray) 90 | 91 | tnOutputFile.close() 92 | """ 93 | -------------------------------------------------------------------------------- /automation/ocr/up_districts.meta: -------------------------------------------------------------------------------- 1 | औरय्या , Auraiya 2 | सभल , Sambhal 3 | गोडा , Gonda 4 | गौतम बुद्ध नगर, Gautam Buddha Nagar 5 | बुध नगर , Gautam Buddha Nagar 6 | बुद्ध नगर, Gautam Buddha Nagar 7 | बनिया , Ballia 8 | अम्बेडकरनगर, Ambedkar Nagar 9 | अम्बेडकर, Ambedkar Nagar 10 | गाँडा , Gonda 11 | मयुरा , Mathura 12 | संतकबीर नगर , Sant Kabir Nagar 13 | सतकबारनगर, Sant Kabir Nagar 14 | सत कबार नगर, Sant Kabir Nagar 15 | समल , Sambhal 16 | सिद्घार्य नगर, Siddharthnagar 17 | संत कबीर, Sant Kabir Nagar 18 | सिद्धार्थनगर, Siddharthnagar 19 | सिद्धार्थनग, Siddharthnagar 20 | गौतम बुध नगर , Gautam Buddha Nagar 21 | संत कबीर नगर , Sant Kabir Nagar 22 | कानपुर नगर, Kanpur Nagar 23 | रायबरेली , Rae Bareli 24 | राय बरेली, Rae Bareli 25 | राय , Rae Bareli 26 | लखीमपुर , Lakhimpur Kheri 27 | लखीमपुर खीरी , Lakhimpur Kheri 28 | खीरी , Lakhimpur Kheri 29 | लखीमपुर - खीरी , Lakhimpur Kheri 30 | अमरोहा , Amroha 31 | मथुरा , Mathura 32 | आगरा , Agra 33 | मेरठ , Meerut 34 | गौतम , Gautam Buddha Nagar 35 | लखनऊ , Lucknow 36 | गाज़ियाबाद , Ghaziabad 37 | गाजियाबाद , Ghaziabad 38 | सहारनपुर , Saharanpur 39 | सहारनपर, Saharanpur 40 | वाराणसी , Varanasi 41 | रामपुर , Rampur 42 | जौनपुर , Jaunpur 43 | जौनपर , Jaunpur 44 | बस्ती ,Basti 45 | बाराबंकी , Barabanki 46 | अलीगढ , Aligarh 47 | अलीगढ़ , Aligarh 48 | अलीगढ़ , Aligarh 49 | अलीगढ़ , Aligarh 50 | कबीर नगर, Sant Kabir Nagar 51 | हापुड़, Hapur 52 | हापड , Hapur 53 | हापुड , Hapur 54 | अमेठी , Amethi 55 | बुलंदशहर , Bulandshahr 56 | बलंदशहर , Bulandshahr 57 | बुलदशहर , Bulandshahr 58 | अयोध्या , Ayodhya 59 | सिद्धार्थ नगर, Siddharthnagar 60 | सिद्धार्थ, Siddharthnagar 61 | गाजीपुर , Ghazipur 62 | गाजीपर , Ghazipur 63 | बिजनौर , Bijnor 64 | प्रयागराज , Prayagraj 65 | आजमगढ़ , Azamgarh 66 | आजमगढ , Azamgarh 67 | संभल , Sambhal 68 | बहराइच ,Bahraich 69 | सुल्तानपुर , Sultanpur 70 | संत , Sant Kabir Nagar 71 | प्रतापगढ़ , Pratapgarh 72 | गोरखपुर ,Gorakhpur 73 | कानपुर , Kanpur Nagar 74 | फिरोजाबाद , Firozabad 75 | मुरादाबाद , Moradabad 76 | मरादाबाद , Moradabad 77 | देवरिया , Deoria 78 | बरेली , Bareilly 79 | गोंडा , Gonda 80 | कौशाम्बी , Kaushambi 81 | अमेठी , Amethi 82 | मुजफ्फरनगर ,Muzaffarnagar 83 | इटावा , Etawah 84 | जालौन , Jalaun 85 | शामली , Shamli 86 | पीलीभीत , Pilibhit 87 | अम्बेडकर नगर ,Ambedkar Nagar 88 | फतेहपुर , Fatehpur 89 | महाराजगंज , Maharajganj 90 | सीतापुर , Sitapur 91 | हरदोई , Hardoi 92 | बदायूँ , Budaun 93 | बदार्य, Budaun 94 | बदायें , Budaun 95 | बदायूं , Budaun 96 | बलरामपुर , Balrampur 97 | कन्नौज , Kannauj 98 | झाँसी , Jhansi 99 | झांसी , Jhansi 100 | बलिया , Ballia 101 | मिर्जापुर , Mirzapur 102 | बागपत , Baghpat 103 | चित्रकूट , Chitrakoot 104 | श्रावस्ती ,Shrawasti 105 | भदोही , Bhadohi 106 | उन्नाव , Unnao 107 | मैनपुरी , Mainpuri 108 | फर्रुखाबाद ,Farrukhabad 109 | फरुखाबाद , Farrukhabad 110 | बाँदा , Banda 111 | बांदा , Banda 112 | औरैय्या ,Auraiya 113 | हाथरस , Hathras 114 | चंदौली , Chandauli 115 | चंदाली , Chandauli 116 | शाहजहांपुर , Shahjahanpur 117 | शाहजहापुर , Shahjahanpur 118 | एटा , Etah 119 | कासगंज , Kasganj 120 | मऊ , Mau 121 | कानपुर देहात , Kanpur Dehat 122 | देहात , Kanpur Dehat 123 | कुशीनगर , Kushinagar 124 | महोबा , Mahoba 125 | हमीरपुर , Hamirpur 126 | सोनभद्र , Sonbhadra 127 | ललितपुर , Lalitpur 128 | -------------------------------------------------------------------------------- /automation/nameMapping.meta: -------------------------------------------------------------------------------- 1 | Himachal Pradesh, Sirmour, Sirmaur 2 | Himachal Pradesh, L & Spiti, Lahaul and Spiti 3 | Himachal Pradesh, & Spiti, Lahaul and Spiti 4 | Arunachal Pradesh, Kessang, Pakke Kessang 5 | Arunachal Pradesh, Kumey, Kurung Kumey 6 | Arunachal Pradesh, Leparada, Lepa Rada 7 | Arunachal Pradesh, Dibang Valley, Upper Dibang Valley 8 | Arunachal Pradesh, Capital Region, Papum Pare 9 | Arunachal Pradesh, Capital Complex, Papum Pare 10 | Arunachal Pradesh, Kra - Daadi, Kra Daadi 11 | Arunachal Pradesh, Lower Dibang, Lower Dibang Valley 12 | Madhya Pradesh, Betull, Betul 13 | Jammu and Kashmir, Shopian, Shopiyan 14 | Jammu and Kashmir, Poonch, Punch 15 | Andhra Pradesh, Kadapa, Y.S.R. Kadapa 16 | Andhra Pradesh, Nellore, S.P.S. Nellore 17 | Andhra Pradesh, lOther States *, Other State 18 | Andhra Pradesh, lOther Countries **, Foreign Evacuees 19 | Andhra Pradesh, Other States *, Other State 20 | Andhra Pradesh, Other Countries **, Foreign Evacuees 21 | Andhra Pradesh, Other States, Other State 22 | Andhra Pradesh, Other Countries, Foreign Evacuees 23 | Andhra Pradesh, Ananthapur, Anantapur 24 | Odisha,Sonepur,Subarnapur 25 | Odisha,Khurdha,Khordha 26 | Odisha,Sundergarh,Sundargarh 27 | Odisha,Keonjhar,Kendujhar 28 | Odisha,Baragarh,Bargarh 29 | Odisha,Others *,Others 30 | Odisha,Nawarangpur,Nabarangapur 31 | Odisha,Bolangir,Balangir 32 | Odisha,Jagatsinghapur,Jagatsinghpur 33 | Odisha,Nabarangpur,Nabarangapur 34 | Odisha,Tstate Pool,State Pool 35 | Maharashtra,Ahmadnagar,Ahmednagar 36 | Maharashtra,Ahmadnagar,Ahmednagar 37 | Maharashtra,Amaravati,Amravati 38 | Maharashtra,Other States,Other State 39 | Maharashtra,Other states/country, Other State 40 | Mizoram,Siaha,Saiha 41 | Rajasthan,S. Madhopur, Sawai Madhopur 42 | Gujarat, Devbhoomi Dwarka, Devbhumi Dwarka 43 | Assam, Kamrup Metro, Kamrup Metropolitan 44 | Tripura, Sepahijala, Sipahijala 45 | Tripura, Unakoti, Unokoti 46 | Puducherry, Pondicherry, Puducherry 47 | Tamil Nadu, Pudukottai, Pudukkottai 48 | Tamil Nadu, Sivagangai, Sivaganga 49 | Tamil Nadu, Thirupathur, Tirupathur 50 | Tamil Nadu, Thiruvannamalai, Tiruvannamalai 51 | Tamil Nadu, Thoothukudi, Thoothukkudi 52 | Tamil Nadu, Trichy, Tiruchirappalli 53 | Tamil Nadu, Villupuram, Viluppuram 54 | Punjab, SBS Nagar, Shahid Bhagat Singh Nagar 55 | Punjab, Muktsar, Sri Muktsar Sahib 56 | Punjab, FG Sahib, Fatehgarh Sahib 57 | Punjab, SAS Nagar, S.A.S. Nagar 58 | Punjab, Ropar, Rupnagar 59 | Rajasthan, S.Madhopur, Sawai Madhopur 60 | Jharkhand, Saraikela, Saraikela-Kharsawan 61 | Jharkhand, Sahebgani, Sahebganj 62 | Jharkhand, Sahebganj, Sahibganj 63 | Haryana, Foreign returnee (USA), Foreign Evacuees 64 | Haryana, Charkhi, Charkhi Dadri 65 | Haryana, CharkhiDadri, Charkhi Dadri 66 | Haryana, Sonepat, Sonipat 67 | Haryana, Mohindergarh, Mahendragarh 68 | Haryana, Mahindergarh, Mahendragarh 69 | Karnataka, Gadaga, Gadag 70 | Karnataka, Bengaluru, Bengaluru Urban 71 | Karnataka, Vijayapur, Vijayapura 72 | Karnataka, Chikballapura, Chikkaballapura 73 | Karnataka, Chamarajanagar, Chamarajanagara 74 | Karnataka, Chikkamgaluru, Chikkamagaluru 75 | Karnataka, Davangere, Davanagere 76 | Karnataka, Mysore, Mysuru 77 | Karnataka, Uttar Kannada, Uttara Kannada 78 | Karnataka, Others, Other State 79 | Karnataka, Dakshin Kannada, Dakshina Kannada 80 | Karnataka, Yadagiri, Yadgir 81 | Karnataka, Yadgiri, Yadgir 82 | Karnataka, Koppala, Koppal 83 | Karnataka, Dharawada, Dharwad 84 | Karnataka, Dharwada, Dharwad 85 | Karnataka, Bagalakote, Bagalkote 86 | Karnataka, Bagalkot, Bagalkote 87 | Karnataka, Raichuru, Raichur 88 | Karnataka, Kolara, Kolar 89 | Karnataka, Hassana, Hassan 90 | Karnataka, Others*, Other State 91 | Karnataka, Bellary, Ballari 92 | Karnataka, Ramanagar, Ramanagara 93 | Karnataka, Kalaburgi, Kalaburagi 94 | Karnataka, Kalburgi, Kalaburagi 95 | Karnataka, Bangalore Urban, Bengaluru Urban 96 | Karnataka, Bangalore Rural, Bengaluru Rural 97 | Karnataka, Tumkur, Tumakuru 98 | Kerala, Kasargod, Kasaragod 99 | Kerala, Kozhikkode, Kozhikode 100 | West Bengal,Coochbehar,Cooch Behar 101 | West Bengal,Bankura*,Bankura 102 | West Bengal,Purba Bardhaman*, Purba Bardhaman 103 | Meghalaya, South -West Garo Hills, South West Garo Hills 104 | Meghalaya, Ri Bhoi, Ribhoi 105 | Meghalaya, Ri - Bhoi District, Ribhoi 106 | Meghalaya, RiBhoi, Ribhoi 107 | Uttarakhand, U.S. Nagar, Udham Singh Nagar 108 | Uttarakhand, Chamoll, Chamoli 109 | -------------------------------------------------------------------------------- /automation/ocr/ocr.sh: -------------------------------------------------------------------------------- 1 | customiseMetaConfig() { 2 | stateCode=$( echo $1 ) 3 | replacementLine=$( echo $2 ) 4 | sedString=$( echo $3 ) 5 | parameterStateCode=$( echo $3 | cut -d':' -f1 ) 6 | parametersToReplace=$( echo $3 | cut -d':' -f2 ) 7 | if [ "$stateCode" = "$parameterStateCode" ] 8 | then 9 | for param in $(echo $parametersToReplace | sed "s/,/ /g") 10 | do 11 | parameterToReplace=$( echo $param | cut -d'=' -f1 ) 12 | value=$( echo $param | cut -d'=' -f2 ) 13 | replacementSubString=$( echo "$replacementSubString;s/\\\$$parameterToReplace/$value/g" ) 14 | done 15 | fi 16 | echo $replacementLine | sed "$replacementSubString" 17 | } 18 | 19 | 20 | 21 | if (( $# != 4 && $# != 5 )) 22 | then 23 | echo "Usage: ./ocr.sh [Starting String] " 24 | exit 25 | fi 26 | 27 | format="ocr" 28 | 29 | skipOcr=0 30 | skipTable=0 31 | skipAutomation=0 32 | individualRecords=0 33 | 34 | if (( $# == 5 )) 35 | then 36 | for i in $(echo $5 | sed "s/,/ /g") 37 | do 38 | option=`echo $i |awk '{print tolower($0)}'` 39 | 40 | case $option in 41 | "all") 42 | ;; 43 | "ocr") 44 | skipOcr=1 45 | echo "**** Skipping OCR Generation ****" 46 | ;; 47 | "table") 48 | echo "**** Skipping CSV Generation ****" 49 | skipTable=1 50 | ;; 51 | "automation") 52 | echo "**** Skipping Automation ****" 53 | skipAutomation=1 54 | ;; 55 | "ocr,table") 56 | echo "**** Skipping OCR, CSV Generation ****" 57 | skipOcr=1 58 | skipTable=1 59 | ;; 60 | "individual") 61 | individualRecords=1 62 | ;; 63 | "f1") 64 | echo "**** Using format type 1 for UP ****" 65 | format="ocr1" 66 | ;; 67 | "f2") 68 | echo "**** Using format type 2 for UP ****" 69 | format="ocr2" 70 | ;; 71 | esac 72 | done 73 | fi 74 | 75 | stateCode="" 76 | case $2 in 77 | "Bihar") 78 | stateCode="br" 79 | ;; 80 | "Uttar Pradesh") 81 | stateCode="up" 82 | ;; 83 | "Madhya Pradesh") 84 | stateCode="mp" 85 | ;; 86 | "Jharkhand") 87 | stateCode="jh" 88 | ;; 89 | "Rajasthan") 90 | stateCode="rj" 91 | ;; 92 | "Punjab") 93 | stateCode="pb" 94 | ;; 95 | "Jammu and Kashmir") 96 | stateCode="jk" 97 | ;; 98 | "Haryana") 99 | stateCode="hr" 100 | ;; 101 | "Andhra Pradesh") 102 | stateCode="ap" 103 | ;; 104 | "Maharashtra") 105 | stateCode="mh" 106 | ;; 107 | "Himachal Pradesh") 108 | stateCode="hp" 109 | ;; 110 | "Chhattisgarh") 111 | stateCode="ct" 112 | ;; 113 | "Uttarakhand") 114 | stateCode="ut" 115 | ;; 116 | "Arunachal Pradesh") 117 | stateCode="ar" 118 | ;; 119 | "Gujarat") 120 | stateCode="gj" 121 | ;; 122 | "Tamil Nadu") 123 | stateCode="tn" 124 | ;; 125 | "Nagaland") 126 | stateCode="nl" 127 | ;; 128 | "Telangana") 129 | stateCode="tg" 130 | ;; 131 | "Karnataka") 132 | stateCode="ka" 133 | ;; 134 | "Sikkim") 135 | stateCode="sk" 136 | ;; 137 | "Mizoram") 138 | stateCode="mz" 139 | ;; 140 | "Meghalaya") 141 | stateCode="ml" 142 | ;; 143 | "Kerala") 144 | stateCode="kl" 145 | ;; 146 | "Assam") 147 | stateCode="as" 148 | ;; 149 | "Manipur") 150 | stateCode="mn" 151 | ;; 152 | 153 | *) 154 | stateCode="invalid" 155 | esac 156 | 157 | echo -e "\n********************* If you want to see the ocr data, cat output.txt *********************\n" 158 | 159 | if (( $skipOcr != 1 )) 160 | then 161 | echo -e "\n******** Calling google vision api *******" 162 | python3 ocr_vision.py $1 > bounds.txt 163 | fi 164 | 165 | if (( $skipTable != 1 )) 166 | then 167 | replacementLine="s/@@statename@@/\$stateCode/g;s/@@yInterval@@/\$yInterval/g;s/@@xInterval@@/\$xInterval/g;s/@@houghTransform@@/\$houghTransform/g;s/@@enableTranslation@@/\$enableTranslation/g;s/@@startingText@@/\$startingText/g;s/@@configMinLineLength@@/\$configMinLineLength/g;" 168 | 169 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "hp:houghTransform=False,yInterval=5" ) 170 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "br:houghTransform=False" ) 171 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "mp:houghTransform=False" ) 172 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "ap:configMinLineLength=300" ) 173 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "tn:configMinLineLength=500" ) 174 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "tg:enableTranslation=True" ) 175 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "mz:houghTransform=False" ) 176 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "ml:configMinLineLength=250" ) 177 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "ut:houghTransform=False" ) 178 | replacementLine=$( customiseMetaConfig $stateCode $replacementLine "nl:configMinLineLength=250" ) 179 | 180 | configMinLineLength=400 181 | enableTranslation=`echo $4` 182 | startingText=`echo $3` 183 | houghTransform="True" 184 | yInterval=0 185 | xInterval=0 186 | 187 | finalReplacementString=$( echo $replacementLine | sed "s/\$stateCode/$stateCode/g; s/\$yInterval/$yInterval/g; s/\$xInterval/$xInterval/g; s/\$houghTransform/$houghTransform/g; s/\$enableTranslation/$enableTranslation/g; s/\$startingText/$startingText/g; s/\$configMinLineLength/$configMinLineLength/g" ) 188 | 189 | echo $finalReplacementString 190 | 191 | sed "$finalReplacementString" ocrconfig.meta.orig > ocrconfig.meta 192 | 193 | echo -e "\n******** Using ocrconfig.meta, change ocrconfig.meta.orig for x and y intervals ******* " 194 | cat ocrconfig.meta 195 | echo -e "******** ++++++++ *******" 196 | python3 googlevision.py ocrconfig.meta $1 197 | fi 198 | 199 | cp output.txt ../.tmp/$stateCode.txt 200 | 201 | if (( $skipAutomation != 1 && $individualRecords != 1 )) 202 | then 203 | cd .. 204 | echo -e "\n******** Calling automation.py for $2 ******* " 205 | python3 ./automation.py "$2" "full" $format 206 | fi 207 | -------------------------------------------------------------------------------- /automation/kaautomation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | import csv 4 | import requests 5 | import camelot 6 | import re 7 | import datetime 8 | from deltaCalculator import DeltaCalculator 9 | 10 | deltaCalculator = DeltaCalculator(True) 11 | category = "d" 12 | 13 | def readPDF(): 14 | global category 15 | """ 16 | r = requests.get(sys.argv[1], allow_redirects=True) 17 | print("URL: " + sys.argv[1]) 18 | open(".tmp/ka.pdf", 'wb').write(r.content) 19 | """ 20 | 21 | print(10*"-" + " Deceased details (IGNORE THE FIRST TWO LINES) " + 10*"-") 22 | if len(sys.argv) == 4: 23 | category = sys.argv[1] 24 | startPid = sys.argv[2] 25 | endPid = sys.argv[3] 26 | else: 27 | category = input("Enter c/r/d : ") 28 | startPid = input("Enter start page number: ") 29 | endPid = input("Enter end page number: ") 30 | 31 | pages = "" 32 | for i in range(int(startPid), int(endPid) + 1): 33 | pages = pages + "," + str(i) if len(pages) != 0 else str(i) 34 | print(f"Processing pages {pages}") 35 | 36 | tables = camelot.read_pdf('.tmp/KA.pdf',strip_text='\n', pages=pages, split_text = True) 37 | 38 | for index, table in enumerate(tables): 39 | tables[index].to_csv('.tmp/ka' + str(index) + '.csv') 40 | 41 | processTmpFiles(tables) 42 | 43 | 44 | def processTmpFiles(tables): 45 | kaOutputFile = open('kaconfirmed.csv', 'w') 46 | csvWriter = csv.writer(kaOutputFile, delimiter=',', quotechar='"') 47 | linesToWrite = [] 48 | lineNumber = 0 49 | for index, table in enumerate(tables): 50 | kaFile = open('.tmp/ka' + str(index) + '.csv', 'r') 51 | with open('.tmp/ka' + str(index) + '.csv', newline='') as kaFile: 52 | rowReader = csv.reader(kaFile, delimiter=',', quotechar='"') 53 | for row in rowReader: 54 | line = '|'.join(row) 55 | line = re.sub('^\|', '', line) 56 | if len(re.sub('^\|+', '', line)) == 0: 57 | continue 58 | if 'Page' in line: 59 | continue 60 | #line = re.sub('\|$', '', re.sub('^\|+', '', line.replace('\"', '').replace(',,', ','))) 61 | 62 | linesArray = line.split('|') 63 | 64 | if category == "c": 65 | confirmedFileWriter(linesArray, linesToWrite) 66 | 67 | if category == "r": 68 | recoveredFileWriter(linesArray, linesToWrite) 69 | 70 | if category == "d": 71 | deceasedFileWriter(linesArray, linesToWrite) 72 | 73 | kaFile.close() 74 | for row in linesToWrite: 75 | csvWriter.writerow(row) 76 | kaOutputFile.close() 77 | 78 | 79 | def is_number(s): 80 | try: 81 | int(s) 82 | return True 83 | except ValueError: 84 | return False 85 | 86 | 87 | def confirmedFileWriter(linesArray, linesToWrite): 88 | ''' 89 | if len(linesArray) != 8 or len(linesArray[5]) == 0: 90 | print("Ignoring {}".format(linesArray)) 91 | return "" 92 | ''' 93 | 94 | gender = "" 95 | if linesArray[5].strip() == 'Female': 96 | gender = 'F' 97 | elif linesArray[5].strip() == 'Male': 98 | gender = 'M' 99 | else: 100 | gender = 'Non-Binary' 101 | 102 | districtName = "" 103 | districtName = deltaCalculator.getNameMapping('Karnataka', linesArray[6]) 104 | 105 | if len(linesArray[3]) == 0 and len(linesToWrite) != 0: 106 | print("Processing: {}".format(linesArray)) 107 | for index, cellValue in enumerate(linesArray): 108 | if len(cellValue) > 0 and index == 4: 109 | linesToWrite[len(linesToWrite) - 1][2] = str(linesToWrite[len(linesToWrite) - 1][2]) + " " + str(cellValue) 110 | if len(cellValue) > 0 and index == 7: 111 | linesToWrite[len(linesToWrite) - 1][11] = str(linesToWrite[len(linesToWrite) - 1][11]) + " " + str(cellValue) 112 | if len(cellValue) > 0 and index == 6: 113 | linesToWrite[len(linesToWrite) - 1][5] = linesToWrite[len(linesToWrite) - 1][5] + " " + str(cellValue) 114 | return 115 | patientNumber = linesArray[3].replace('P-', 'KA-P') if 'P' in linesArray[3] else "KA-P" + str(linesArray[3]) 116 | 117 | linesToWrite.append([patientNumber, datetime.date.today().strftime("%d/%m/%Y"), linesArray[4], gender, '', districtName, 'Karnataka', 'KA', 1, 'Hospitalized','', linesArray[7]]) 118 | 119 | def recoveredFileWriter(linesArray, linesToWrite): 120 | """ 121 | if len(linesArray) < 3: 122 | print("Ignoring {} ".format(linesArray)) 123 | return "" 124 | """ 125 | 126 | districtName = linesArray[1].split('(')[0].strip() 127 | districtName = deltaCalculator.getNameMapping('Karnataka', districtName) 128 | 129 | patientIds = re.sub('\.', '', re.sub('&', ',', re.sub(' +', ',', linesArray[3]))) 130 | patientIdArray = patientIds.split(',') 131 | 132 | if len(linesArray[2]) == 0 and len(linesToWrite) != 0 and len(patientIdArray) > 0: 133 | districtName = linesToWrite[len(linesToWrite) - 1][5] 134 | 135 | for item in patientIdArray: 136 | if len(item) == 0 or item == "0": #or is_number(item) or '(' in item: 137 | continue 138 | if item == "\n": 139 | continue 140 | patientNumber = item.replace('P-', '').replace('\n', '') if 'P' in item else str(item) 141 | linesToWrite.append([patientNumber, datetime.date.today().strftime("%d/%m/%Y"), '', '','',districtName,'Karnataka', 'KA', 1, 'Recovered']) 142 | #csvWriter.writerow([item.replace('P-', 'KA-P').replace('\n', ''), datetime.date.today().strftime("%d/%m/%Y"), '', '','',districtName,'Karnataka', 'KA', 1, 'Recovered']) 143 | 144 | 145 | def deceasedFileWriter(linesArray, linesToWrite): 146 | """ 147 | if len(linesArray) < 8 or len(linesArray[1]) == 0: 148 | print("Ignoring {} ".format(linesArray)) 149 | return "" 150 | """ 151 | if len(linesArray[0]) == 0: 152 | linesArray.pop(0) 153 | 154 | #print(linesArray) 155 | districtName = linesArray[1].strip() 156 | districtName = deltaCalculator.getNameMapping('Karnataka', districtName) 157 | description = "" 158 | if len(linesArray) < 5: 159 | return 160 | for index, data in enumerate(linesArray): 161 | if index < 5: 162 | continue 163 | else: 164 | description = description + ";" + data if len(description) > 0 else data 165 | #csvWriter.writerow(["KA-P" + str(linesArray[2]), datetime.date.today().strftime("%d/%m/%Y"), linesArray[3], linesArray[4], '', districtName, 'Karnataka', 'KA', 1, 'Deceased', '', description]) 166 | if len(linesArray) > 3 and len(linesArray[2]) == 0 and len(linesToWrite) != 0: 167 | for index, cellValue in enumerate(linesArray): 168 | if len(cellValue) > 0 and index == 3: 169 | linesToWrite[len(linesToWrite) - 1][2] = str(linesToWrite[len(linesToWrite) - 1][2]) + " " + str(cellValue) 170 | if len(cellValue) > 0 and index == 4: 171 | linesToWrite[len(linesToWrite) - 1][3] = str(linesToWrite[len(linesToWrite) - 1][3]) + " " + str(cellValue) 172 | return 173 | linesToWrite.append([str(linesArray[2]), datetime.date.today().strftime("%d/%m/%Y"), linesArray[3], linesArray[4], '', districtName, 'Karnataka', 'KA', 1, 'Deceased', '', description]) 174 | 175 | readPDF() 176 | -------------------------------------------------------------------------------- /automation/deltaCalculator.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import logging 4 | import re 5 | import sys 6 | import csv 7 | 8 | logging.basicConfig(filename='deltaCalculator.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) 9 | 10 | class DeltaCalculator: 11 | def __init__(self, lightLoad = False): 12 | if lightLoad == False: 13 | self.buildJson() 14 | 15 | self.nameMapping = {} 16 | self.loadMetaData() 17 | 18 | def buildJson(self): 19 | self.covidDashboardData = {} 20 | self.modifiedDashboardData = requests.request("get", "https://api.covid19india.org/csv/latest/district_wise.csv") 21 | decoded_content = self.modifiedDashboardData.content.decode('utf-8') 22 | 23 | cr = csv.reader(decoded_content.splitlines(), delimiter=',') 24 | rows = list(cr) 25 | for index, row in enumerate(rows): 26 | if index == 0: 27 | continue 28 | if row[2] not in self.covidDashboardData: 29 | self.covidDashboardData[row[2]] = {} 30 | 31 | if 'statecode' not in self.covidDashboardData[row[2]]: 32 | self.covidDashboardData[row[2]]['statecode'] = row[1] 33 | 34 | if 'districtData' not in self.covidDashboardData[row[2]]: 35 | self.covidDashboardData[row[2]]['districtData'] = {} 36 | 37 | if row[4] not in self.covidDashboardData[row[2]]['districtData']: 38 | self.covidDashboardData[row[2]]['districtData'][row[4]] = {} 39 | 40 | self.covidDashboardData[row[2]]['districtData'][row[4]]['confirmed'] = int(row[5]) 41 | self.covidDashboardData[row[2]]['districtData'][row[4]]['confirmed'] = int(row[5]) 42 | self.covidDashboardData[row[2]]['districtData'][row[4]]['recovered'] = int(row[7]) 43 | self.covidDashboardData[row[2]]['districtData'][row[4]]['migratedother'] = int(row[9]) 44 | self.covidDashboardData[row[2]]['districtData'][row[4]]['deceased'] = int(row[8]) 45 | self.covidDashboardData[row[2]]['districtData'][row[4]]['active'] = int(row[6]) 46 | 47 | def getNameMapping(self, stateName, districtName): 48 | mappedDistrict = "" 49 | try: 50 | nameMapping = self.nameMapping[stateName] 51 | mappedDistrict = nameMapping[districtName] 52 | except KeyError: 53 | mappedDistrict = districtName 54 | 55 | return mappedDistrict 56 | 57 | def isDistrictPresent(self, stateName, districtName): 58 | try: 59 | self.covidDashboardData[stateName][districtName] 60 | return True 61 | except KeyError: 62 | return False 63 | 64 | def loadMetaData(self): 65 | with open("nameMapping.meta", "r") as metaFile: 66 | lineArray = [] 67 | for line in metaFile: 68 | lineArray = line.split(',') 69 | districtMapping = {} 70 | 71 | currentDictionary = {} 72 | if lineArray[0] not in self.nameMapping: 73 | self.nameMapping[lineArray[0]] = {} 74 | 75 | currentDictionary = self.nameMapping[lineArray[0]] 76 | currentDictionary[lineArray[1].strip()] = re.sub('\n', '', lineArray[2].strip()) 77 | self.nameMapping[lineArray[0]] = currentDictionary 78 | 79 | 80 | def getStateDataFromSite(self, stateName, stateDataFromStateDashboard, options): 81 | logging.info(stateDataFromStateDashboard) 82 | stateData = self.covidDashboardData[stateName]['districtData'] 83 | stateCode = self.covidDashboardData[stateName]['statecode'] 84 | print("\n" + '*' * 20 + stateName + '*' * 20) 85 | try: 86 | nameMapping = self.nameMapping[stateName] 87 | except KeyError: 88 | nameMapping = {} 89 | 90 | confirmedDeltaArray = [] 91 | recoveredDeltaArray = [] 92 | deceasedDeltaArray = [] 93 | activeDeltaArray = [] 94 | migratedDeltaArray = [] 95 | districts = [] 96 | stateTotalFromStateDashboard = {'confirmed': 0, 'recovered': 0, 'deceased': 0} 97 | siteTotalFromStateDashboard = {'confirmed': 0, 'recovered': 0, 'deceased': 0} 98 | errorArray = [] 99 | districtMap = {} 100 | 101 | for districtDetails in stateDataFromStateDashboard: 102 | try: 103 | districtName = nameMapping[districtDetails['districtName']] if districtDetails['districtName'] in nameMapping else districtDetails['districtName'] 104 | outputString = "" 105 | 106 | stateTotalFromStateDashboard['confirmed'] += districtDetails['confirmed'] if districtDetails['confirmed'] != -999 else 0 107 | stateTotalFromStateDashboard['recovered'] += districtDetails['recovered'] if districtDetails['recovered'] != -999 else 0 108 | stateTotalFromStateDashboard['deceased'] += districtDetails['deceased'] if districtDetails['deceased'] != -999 else 0 109 | 110 | siteTotalFromStateDashboard['confirmed'] += stateData[districtName]['confirmed'] 111 | siteTotalFromStateDashboard['recovered'] += stateData[districtName]['recovered'] 112 | siteTotalFromStateDashboard['deceased'] += stateData[districtName]['deceased'] 113 | 114 | confirmedDelta = districtDetails['confirmed'] - stateData[districtName]['confirmed'] if districtDetails['confirmed'] != -999 else "NA" 115 | recoveredDelta = districtDetails['recovered'] - stateData[districtName]['recovered'] if districtDetails['recovered'] != -999 else "NA" 116 | deceasedDelta = districtDetails['deceased'] - stateData[districtName]['deceased'] if districtDetails['deceased'] != -999 else "NA" 117 | activeDelta = 0 118 | migratedDelta = 0 119 | 120 | if 'migrated' in districtDetails.keys(): 121 | migratedDelta = districtDetails['migrated'] - stateData[districtName]['migratedother'] 122 | 123 | if 'active' in districtDetails.keys(): 124 | activeDelta = districtDetails['active'] - (stateData[districtName]['confirmed'] - stateData[districtName]['deceased'] - stateData[districtName]['recovered']) 125 | except KeyError: 126 | errorArray.append("--> ERROR: Failed to find key mapping for district: {}, state: {}".format(districtName, stateName)) 127 | continue 128 | 129 | if not options: 130 | outputString = districtName + ", " + str(confirmedDelta) + ", " + str(recoveredDelta) + ", " + str(deceasedDelta) 131 | print(outputString) 132 | if options == "detailed" or options == "full" or options == "fullActive": 133 | districts.append(districtName) 134 | confirmedDeltaArray.append(confirmedDelta) 135 | recoveredDeltaArray.append(recoveredDelta) 136 | deceasedDeltaArray.append(deceasedDelta) 137 | activeDeltaArray.append(activeDelta) 138 | migratedDeltaArray.append(migratedDelta) 139 | 140 | stateConfirmedDelta = stateTotalFromStateDashboard['confirmed'] - siteTotalFromStateDashboard['confirmed'] 141 | stateRecoveredDelta = stateTotalFromStateDashboard['recovered'] - siteTotalFromStateDashboard['recovered'] 142 | stateDeceasedDelta = stateTotalFromStateDashboard['deceased'] - siteTotalFromStateDashboard['deceased'] 143 | 144 | if options == "detailed": 145 | districts.append('Total') 146 | confirmedDeltaArray.append(stateConfirmedDelta) 147 | recoveredDeltaArray.append(stateRecoveredDelta) 148 | deceasedDeltaArray.append(stateDeceasedDelta) 149 | 150 | self.printDistricts(self.printDeltas(confirmedDeltaArray, "Confirmed"), districts) 151 | self.printDistricts(self.printDeltas(recoveredDeltaArray, "Recovered"), districts) 152 | self.printDistricts(self.printDeltas(deceasedDeltaArray, "Deceased"), districts) 153 | elif options == "full": 154 | self.printFullDetails(confirmedDeltaArray, "Hospitalized", stateName, stateCode, districts) 155 | self.printFullDetails(recoveredDeltaArray, "Recovered", stateName, stateCode, districts) 156 | self.printFullDetails(deceasedDeltaArray, "Deceased", stateName, stateCode, districts) 157 | if 'migrated' in districtDetails.keys(): 158 | self.printFullDetails(migratedDeltaArray, "Migrated_Other", stateName, stateCode, districts) 159 | elif options == "fullActive": 160 | self.printFullDetails(activeDeltaArray, "Active", stateName, stateCode, districts) 161 | return 162 | else: 163 | print("Total delta, {}, {}, {}".format(stateConfirmedDelta, stateRecoveredDelta, stateDeceasedDelta)) 164 | 165 | print("StateTotal, {}, {}, {}".format(stateTotalFromStateDashboard['confirmed'], stateTotalFromStateDashboard['recovered'], stateTotalFromStateDashboard['deceased'])) 166 | print("SiteTotal, {}, {}, {}".format(siteTotalFromStateDashboard['confirmed'], siteTotalFromStateDashboard['recovered'], siteTotalFromStateDashboard['deceased'])) 167 | 168 | if len(errorArray) > 0: 169 | for error in errorArray: 170 | print(error) 171 | 172 | def printFullDetails(self, deltaArray, category, stateName, stateCode, districts): 173 | with open("output2.txt", "w+") as f: 174 | for index, data in enumerate(deltaArray): 175 | if data != 0 and data != "NA": 176 | print("{},{},{},{},{}".format(districts[index], stateName, stateCode, data, category), file=f) 177 | print("{},{},{},{},{}".format(districts[index], stateName, stateCode, data, category)) 178 | 179 | def printDeltas(self, deltaArray, category): 180 | print('-' * 20 + category + '-' * 20) 181 | printIndex = [] 182 | for index, data in enumerate(deltaArray): 183 | if data != 0 and data != "NA": 184 | print(data) 185 | printIndex.append(index) 186 | 187 | return printIndex 188 | 189 | def printDistricts(self, printIndex, districts): 190 | for data in printIndex: 191 | print(districts[data]) 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ***A newer, cleaner version is being written [here](https://github.com/bee-rickey/covid_bulletin_automator). WIP*** 2 | 3 | ***A scrapper for multiple covid state websites. Triedcatched's ghost!*** 4 | 5 | ***Used by www.covid19india.org admin teams. Not for general consumption :P :D*** 6 | 7 | 8 | # Usage 9 | Currently there are three types of bulletins: 10 | 11 | 1. Images - AP, AR, BR, CT, JH, JK, HP, MH, MP, NL, PB, RJ, TG, TN, UK, UP 12 | 2. PDFs - HR, KA, KL, PB, TN, WB 13 | 3. Dashboards - CH, GJ, ML, OR, PY, TR, Vaccines 14 | 15 | For all those where ocr is supported (optical character recognition using google vision api), the command to run is: 16 | ```./ocr.sh "fully qualified path to image" "State Name" "starting text, ending text" "True/False" "ocr/table/automation"``` 17 | 18 | Parameter description: 19 | 1. "fully qualified path to image": Example "./home/covid/mh.jpg" The path cannot be relative path but it should have the fully qualified path. 20 | 2. "State Name": This is the state for which the image is being passed. Example: "Andhra Pradesh". 21 | 3. "starting text, ending text": This is the starting text of an image which considered to be the begining of a bulletin. In case you want auto detection to kick in, use "auto,auto". In some of the cases, if the bulletin has a text above the table with district names, consider cropping the image to have only the table with district data. 22 | 4. "True/False": This parameter is used in case you want to translate the district name (True: yes, please translate. False: No, do not translate). As of now this is applicable only to UP and BR bulletins. 23 | 5. "ocr/table/automation": This is an option provided where in case you want to skip one or more of the steps (ocr, table creation or automation.py run), you can provide those steps in comma separated manner. Example: "ocr,automation" will skip both ocr step and the automation step. "ocr,table" will skip image reading and table creation, but will run automation.py step to compute the delta. 24 | 25 | 26 | # How does ocr.sh work? 27 | ![Detailed Flow of ocr.sh](detailedflow.png) 28 | 29 | For any bulletin to be parsed, we use Google Vision API free tier. All the steps are called via ocr.sh. 30 | 1. First, [google vision api is called](https://github.com/bee-rickey/webScraper/blob/68441dbbd0aff5980b8984bcd2cee701950e96c9/automation/ocr/ocr.sh#L159) on the image to generate bounds.txt file. This is a direct output from the Google Vision API. This needs to be parsed to figure out the tablular structure. 31 | 2. Next, "ocrconfig.meta.orig" [file is parsed](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L188) and this generates ocrconfig.meta file. This file is used to tweak the way a table is interpreted in a bulletin. 32 | 3. ocr.sh internaly [invokes googlevision.py](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L193) file as well. This file is responsible for using ocrconfig.meta file and read the output generated by the Google Vision API (bounds.txt). The output of this step is generation of an output.txt file. This file has textually converted data of the image passed (basically a csv file with a row for each district given in the image). 33 | 4. In the last step, [the bounds.txt file is copied](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L196) into automation/.tmp/stateCode.txt and [automation.py is invoked](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L202) to generate the delta values for the state across all districts. 34 | 35 | NOTES: 36 | - Since output.txt is an intermediate file, in case there are issues wrt bulletin being converted into text, then, the feature of skipping ocr and table generation can be used after correcting values in output.txt. Example: *./ocr.sh auto,auto False "ocr,table"*. 37 | - OCR is heavily dependent on how good the image quality is. If the quality of image is bad, the output of google vision api might not be good enough to generate data. 38 | - Since googlevision.py script tries to auto identify the bulletin table, it alwalys searches for district names and assumes the line with the first occurance of a district name is the starting of the table. Hence, in case there are notes above a table with district names, the image has to be cropped to remove the text above the table. 39 | 40 | 41 | # How does googlevision.py work? 42 | 1. Google Vision API gives each text that it recognises and coordinates of a rectangle around the text it matches. 43 | Example: 44 | ``` 45 | 9248|bounds|245,326|281,326|281,343|245,343 46 | ``` 47 | This shows 9248 was found with bottom left coordinate of 245,326, bottom right of 281,343, top right of 281,343 and top left of 245,343 48 | 49 | The idea is to use this information to figure out which all texts in an image fall on the same lines and same columns. 50 | 2. googlevision.py uses the bounds.txt file which contains this information to generate an internal class per text. This class has the following [definition](https://github.com/bee-rickey/webScraper/blob/47182b314849e1f99ea48a3c537c3a1104513560/automation/ocr/googlevision.py#L38): 51 | ``` 52 | class cellItem: 53 | def __init__(self, value, x, y, lbx, lby, w, h, col, row, index): 54 | self.value = value 55 | self.x = x 56 | self.y = y 57 | self.col = col 58 | self.row = row 59 | self.index = index 60 | self.lbx = lbx 61 | self.lby = lby 62 | self.h = h 63 | self.w = w 64 | ``` 65 | Definitions: 66 | ``` 67 | x - mid point of the text in x direction 68 | y - mid point of the text in y direction 69 | col - a column number assigned to the text (all texts that fall with same x coordinate with a given tolerance will have the same col number) 70 | row - a row number assigned to the text (all texts that fall with same y coordinate with a given tolerance will have the same row number) 71 | index - a unique number identifying each text 72 | lbx - the left bottom x coordinate of the text (used for drawing a rectangle around the text) 73 | lby - the left bottom y coordinate of the text (used for drawing a rectangle around the text) 74 | h - height of the text (calculated using left top y - left bottom y) 75 | w - width of the text (calculated using right bottom x - left bottom x) 76 | ``` 77 | 3. For each text found in the image, using it's rectangular coordinates, the mid points are calculated. 78 | 4. The next steps involve figuring out the row and column numbers. For this the logic is simple: 79 | - If the x coordinates are same, then the lie on the same column (in case hough transformation is used, all texts within the bounds of two consecutive lines should have the same column number). 80 | - If the y coordinates are same, then they lie on the same row. 81 | However, [a tolerance](https://github.com/bee-rickey/webScraper/blob/47182b314849e1f99ea48a3c537c3a1104513560/automation/ocr/googlevision.py#L321) is considered while arriving at col and row numbers. 82 | 5. In order to arrive at the rows (lines) that matter, the starting text and the ending text parameters are used. The moment a line with the starting text is encountered, it is assumed to be the first line of the table. If the starting text is kept as "auto", in that case, the code checks for the first line containing a district name as the starting of the table. 83 | 6. Next step is to print all those with the same row in the same line but with a sorting on the x coordinate (column value). While printing some of the corner scenarios like district names with space need to be considered and handled. 84 | 7. In case the district names are in Hindi, then before printing, the text has to be converted into English using a translation dictionary which is used. 85 | 8. The output is put into a file named output.txt. This file will have a 1-1 conversion of the bulletin table that has districts information. 86 | 87 | # How does automation.py work? 88 | 1. automation.py uses api endpoint at covid19india.org to figure out the difference per district from bulletin to the api endpoint. 89 | 2. automation.py has different modes of operation - ocr, pdf, dashboard. 90 | - For ocr, the .tmp/statecode.txt file is used to compute the delta (this comes from ocr.sh run). 91 | - For pdfs, pdftotext and camelot are used to convert a pdf into a csv file and then use it for delta calculation. 92 | - For dashboards, beautifulsoup or sometimes plain json pulls are used to get the information to calculate the delta. 93 | 3. In case of pdfs, there's an option to specify which page number to read and parse. The format is: 94 | ``` 95 | ./automation.py "statename" full pdf== 96 | ./automation.py "stateName" full pdf== (this in case you manually place the pdf as .tmp/stateCode.pdf) 97 | ``` 98 | 4. For dashboards, a meta file automation.meta has the dashboard endpoint from which to read and parse the data. 99 | 5. For each state, there has to be an entry in automation.meta file (even if it's driven by ocr). The meta file has the stateCode to consider for picking up the file from .tmp folder. The state code also allows for standardization of code. Each state has a GetData() function which acts as the entry point for the calculations. Example: 100 | ``` 101 | def TRGetData(): 102 | response = requests.request("GET", metaDictionary['Tripura'].url) 103 | soup = BeautifulSoup(response.content, 'html.parser') 104 | table = soup.find("tbody").find_all("tr") 105 | 106 | districtArray = [] 107 | for index, row in enumerate(table): 108 | dataPoints = row.find_all("td") 109 | 110 | districtDictionary = {} 111 | districtDictionary['districtName'] = dataPoints[1].get_text().strip() 112 | districtDictionary['confirmed'] = int(dataPoints[8].get_text().strip()) 113 | districtDictionary['recovered'] = int(dataPoints[10].get_text().strip()) 114 | districtDictionary['deceased'] = int(dataPoints[12].get_text().strip()) 115 | districtArray.append(districtDictionary) 116 | 117 | deltaCalculator.getStateDataFromSite("Tripura", districtArray, option) 118 | ``` 119 | 120 | 121 | # How does this code sit in the grand scheme of automation at covid19india.org? 122 | botto.png![image](https://user-images.githubusercontent.com/63364562/130059654-f5257e6a-6ed3-412b-b728-641d39794203.png) 123 | Essentially, the idea is that volunteers send the request over a telegram bot that is then [configured to trigger the script](https://github.com/covid19india/automation-bot/blob/master/src/ocr_functions.py) when a command is required. 124 | -------------------------------------------------------------------------------- /automation/ocr/googlevision.py: -------------------------------------------------------------------------------- 1 | import re 2 | import cv2 3 | import os 4 | import sys 5 | import json 6 | from PIL import Image 7 | import numpy as np 8 | from matplotlib import pyplot as plt 9 | import matplotlib.patches as patches 10 | from matplotlib.patches import Circle 11 | 12 | dataDictionary = {} 13 | dataDictionaryArray = [] 14 | translationDictionary = {} 15 | xInterval = 0 16 | xStartThreshold = 0 17 | yStartThreshold = 0 18 | xEndThreshold = 0 19 | yEndThreshold = 0 20 | configxInterval = 0 21 | configyInterval = 0 22 | yInterval = 0 23 | startingText = "" 24 | endingText = "" 25 | enableTranslation = False 26 | translationFile = "" 27 | fileName = "" 28 | xWidthTotal = 0 29 | configMinLineLength = 600 30 | 31 | def is_number(s): 32 | try: 33 | int(s) 34 | return True 35 | except ValueError: 36 | return False 37 | 38 | class cellItem: 39 | def __init__(self, value, x, y, lbx, lby, w, h, col, row, index): 40 | self.value = value 41 | self.x = x 42 | self.y = y 43 | self.col = col 44 | self.row = row 45 | self.index = index 46 | self.lbx = lbx 47 | self.lby = lby 48 | self.h = h 49 | self.w = w 50 | 51 | class ColumnHandler: 52 | def __init__(self): 53 | self.columnList = [] 54 | self.rowList = [] 55 | self.pointList = [] 56 | 57 | def addPoint(self, x, y): 58 | self.pointList.append(LinePoints(x, y)) 59 | 60 | def prepareRow(self): 61 | rowNumber = 1 62 | self.pointList.sort(key=lambda y: y.y) 63 | for index, col in enumerate(self.pointList): 64 | if index % 2 == 1: 65 | continue 66 | if index == 0: 67 | previousX = col.x 68 | previousY = col.y 69 | continue 70 | 71 | if col.y - previousY < 10: 72 | continue 73 | self.rowList.append(ColumnAndRow(previousX, previousY, col.x, col.y, rowNumber)) 74 | previousX = col.x 75 | previousY = col.y 76 | rowNumber += 1 77 | 78 | def prepareColumn(self): 79 | columnNumber = 1 80 | self.pointList.sort(key=lambda x: x.x) 81 | for index, col in enumerate(self.pointList): 82 | if index % 2 == 1: 83 | continue 84 | if index == 0: 85 | previousX = col.x 86 | previousY = col.y 87 | continue 88 | 89 | if col.x - previousX < 5: 90 | continue 91 | self.columnList.append(ColumnAndRow(previousX, previousY, col.x, col.y, columnNumber)) 92 | previousX = col.x 93 | previousY = col.y 94 | columnNumber += 1 95 | 96 | def printColumnsAndCoordinates(self): 97 | print("Column No ... x1,y1 --> x2,y2") 98 | for column in self.columnList: 99 | print("c{} ... {},{} --> {},{}".format(column.number, column.x1, column.y1, column.x2, column.y2)) 100 | for row in self.rowList: 101 | print("r{} ... {},{} --> {},{}".format(row.number, row.x1, row.y1, row.x2, row.y2)) 102 | 103 | def getNearestLineToTheLeft(self, xCoordinate): 104 | for col in self.columnList: 105 | if xCoordinate > int(col.x1) and xCoordinate < int(col.x2): 106 | return col.x1 107 | return 0 108 | 109 | def getColumnNumber(self, cell): 110 | for col in self.columnList: 111 | if cell.x > col.x1 and cell.x < col.x2: 112 | return col.number 113 | 114 | class ColumnAndRow: 115 | def __init__(self, x1, y1, x2, y2, number): 116 | self.x1 = x1 117 | self.y1 = y1 118 | self.x2 = x2 119 | self.y2 = y2 120 | self.number = number 121 | 122 | class LinePoints: 123 | def __init__(self, x, y): 124 | self.x = x 125 | self.y = y 126 | 127 | def buildCellsV2(): 128 | global xInterval 129 | global yInterval 130 | global startingText 131 | global endingText 132 | global yStartThreshold 133 | global xStartThreshold 134 | global configxInterval 135 | global configyInterval 136 | global xWidthTotal 137 | # testingNumbersFile = open("poly.txt", "r") 138 | #data = json.load(testingNumbersFile) 139 | 140 | def detectLines(): 141 | global columnHandler 142 | global configMinLineLength 143 | img = cv2.imread(fileName) 144 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 145 | edges = cv2.Canny(img, 50, 150) 146 | lines = cv2.HoughLinesP(edges, 1, np.pi/135, configMinLineLength, maxLineGap=250) 147 | columnHandler = ColumnHandler() 148 | for line in lines: 149 | x1, y1, x2, y2 = line[0] 150 | columnHandler.addPoint(x1, y1) 151 | columnHandler.addPoint(x2, y2) 152 | 153 | columnHandler.prepareColumn() 154 | columnHandler.prepareRow() 155 | columnHandler.printColumnsAndCoordinates() 156 | 157 | def buildCells(): 158 | global xInterval 159 | global yInterval 160 | global startingText 161 | global endingText 162 | global xStartThreshold 163 | global yStartThreshold 164 | global xEndThreshold 165 | global yEndThreshold 166 | global configxInterval 167 | global configyInterval 168 | global xWidthTotal 169 | 170 | startingMatchFound = False 171 | endingMatchFound = False 172 | 173 | autoEndingText = endingText 174 | autoStartingText = startingText 175 | 176 | 177 | testingNumbersFile = open("bounds.txt", "r") 178 | for index, line in enumerate(testingNumbersFile): 179 | lineArray = line.split('|') 180 | if len(lineArray) != 6: 181 | continue 182 | 183 | lowerLeft = [] 184 | lowerRight = [] 185 | upperRight = [] 186 | upperLeft = [] 187 | 188 | if not lineArray[0] or not lineArray[2] or not lineArray[4] or not lineArray[5]: 189 | continue 190 | 191 | value = lineArray[0] 192 | 193 | lowerLeft = lineArray[2].split(',') 194 | lowerRight = lineArray[3].split(',') 195 | upperRight = lineArray[4].split(',') 196 | upperLeft = lineArray[5].split(',') 197 | 198 | if len(lowerLeft) != 2 or len(lowerRight) !=2 or len(upperRight) != 2 or len(upperLeft) != 2: 199 | continue 200 | 201 | #Get the mid point of the bound where the text matches 202 | xMean = (int(lowerLeft[0]) + int(lowerRight[0]))/2 203 | yMean = (int(lowerLeft[1]) + int(upperLeft[1]))/2 204 | 205 | if startingText == "auto": 206 | if len(value.title()) > 1 and any(value.title() in district for district in list(translationDictionary.keys())): 207 | if xStartThreshold == 0: 208 | xStartThreshold = xMean 209 | autoStartingText = value 210 | if yStartThreshold == 0: 211 | yStartThreshold = yMean 212 | 213 | if yMean < yStartThreshold: 214 | xStartTreshold = xMean 215 | yStartThreshold = yMean 216 | autoStartingText = value 217 | 218 | 219 | if endingText == "auto": 220 | if len(value.title()) > 1 and any(value.title() in district for district in list(translationDictionary.keys())): 221 | if xEndThreshold == 0: 222 | xEndThreshold = xMean 223 | if yEndThreshold == 0: 224 | yEndThreshold = yMean 225 | autoEndingText = value 226 | 227 | if yMean > yEndThreshold: 228 | xEndThreshold = xMean 229 | yEndThreshold = yMean 230 | autoEndingText = value 231 | 232 | if ',' in startingText: 233 | if value.title() in startingText.split(','):# and startingMatchFound == False: 234 | startingMatchFound = True 235 | xStartThreshold = xMean 236 | yStartThreshold = yMean 237 | else: 238 | if value.title() == startingText and startingMatchFound == False: 239 | startingMatchFound = True 240 | xStartThreshold = xMean 241 | yStartThreshold = yMean 242 | 243 | if ',' in endingText: 244 | if value.title() in endingText.split(','):# and endingMatchFound == False: 245 | endingMatchFound = True 246 | xEndThreshold = xMean 247 | yEndThreshold = yMean 248 | else: 249 | if value.title() == endingText and endingMatchFound == False: 250 | endingMatchFound = True 251 | xEndThreshold = xMean 252 | yEndThreshold = yMean 253 | 254 | #Use these intervals as a possible error in mid point calculation 255 | xInterval = (int(lowerRight[0]) - int(lowerLeft[0]))/2 if (int(lowerRight[0]) - int(lowerLeft[0]))/2 > xInterval else xInterval 256 | yInterval = (int(upperLeft[1]) - int(lowerLeft[1]))/2 if (int(upperLeft[1]) - int(lowerLeft[1]))/2 > yInterval else yInterval 257 | xWidthTotal = xWidthTotal + int(lowerRight[0]) - int(lowerLeft[0]) 258 | dataDictionaryArray.append(cellItem(value, xMean, yMean, lowerLeft[0], lowerLeft[1], (float(lowerRight[0]) - float(lowerLeft[0])), (float(upperLeft[1]) - float(lowerLeft[1])), 0, 0, index + 1)) 259 | xWidthTotal = xWidthTotal/len(dataDictionaryArray) 260 | startingText = autoStartingText 261 | endingText = autoEndingText 262 | testingNumbersFile.close() 263 | 264 | def buildReducedArray(): 265 | global endingText 266 | tempDictionaryArray = [] 267 | global xInterval 268 | global yInterval 269 | global dataDictionaryArray 270 | global columnHandler 271 | maxWidth = 0 272 | maxHeight = 0 273 | 274 | #Ignore the texts that lie to the left and top of the threshold text. This improves accuracy of output 275 | print("Starting text: {} ... Ending text: {}".format(startingText, endingText)) 276 | xLimit = columnHandler.getNearestLineToTheLeft(xStartThreshold) if houghTransform == True else xStartThreshold - 20 277 | for cell in dataDictionaryArray: 278 | if cell.y < yStartThreshold - 10 or (xLimit is not None and cell.x < xLimit): 279 | continue 280 | 281 | if len(endingText) != 0 and (cell.y > yEndThreshold + 10): # or cell.x < xEndThreshold - 30): 282 | continue 283 | 284 | tempDictionaryArray.append(cell) 285 | maxWidth = cell.w if cell.w > maxWidth else maxWidth 286 | maxHeight = cell.h if cell.h > maxHeight else maxHeight 287 | 288 | xInterval = maxWidth/2 289 | yInterval = maxHeight/2 290 | 291 | dataDictionaryArray = tempDictionaryArray 292 | 293 | def assignRowsAndColumns(): 294 | global yInterval 295 | global xInterval 296 | global configyInterval 297 | global configxInterval 298 | 299 | 300 | if configxInterval != 0: 301 | xInterval = configxInterval 302 | if configyInterval != 0: 303 | yInterval = configyInterval 304 | 305 | print("Using computed yInterval: {}, xInterval: {}".format(yInterval, xInterval)) 306 | for rowIndex, currentCell in enumerate(dataDictionaryArray): 307 | 308 | if currentCell.row == 0: 309 | currentCell.row = rowIndex + 1 310 | for colIndex, restOfTheCells in enumerate(dataDictionaryArray): 311 | 312 | if currentCell.col == 0: 313 | if houghTransform == True: 314 | currentCell.col = columnHandler.getColumnNumber(currentCell) 315 | else: 316 | currentCell.col = rowIndex + 1 317 | 318 | if restOfTheCells.index == currentCell.index: 319 | continue 320 | 321 | yUpperBound = currentCell.y + yInterval 322 | yLowerBound = currentCell.y - yInterval 323 | #If the y coordinate matches, the texts lie on the same row 324 | if restOfTheCells.row == 0: 325 | if yLowerBound <= restOfTheCells.y <= yUpperBound: 326 | restOfTheCells.row = rowIndex + 1 327 | 328 | xUpperBound = currentCell.x + xInterval 329 | xLowerBound = currentCell.x - xInterval 330 | 331 | #If the x coordinate matches, the texts lie on the same column 332 | if restOfTheCells.col == 0: 333 | if houghTransform == True: 334 | restOfTheCells.col = columnHandler.getColumnNumber(restOfTheCells) 335 | elif xLowerBound <= restOfTheCells.x <= xUpperBound: 336 | restOfTheCells.col = currentCell.col 337 | 338 | 339 | def buildTranslationDictionary(): 340 | global startingText 341 | global endingText 342 | 343 | originalStartingText = startingText 344 | originalEndingText = endingText 345 | 346 | with open(translationFile, "r") as metaFile: 347 | for line in metaFile: 348 | if line.startswith('#'): 349 | continue 350 | lineArray = line.strip().split(',') 351 | if len(startingText) != 0: 352 | if originalStartingText.strip() == lineArray[1].strip(): 353 | startingText = startingText + "," + lineArray[0].strip() 354 | 355 | if len(endingText) != 0: 356 | if originalEndingText.strip() == lineArray[1].strip(): 357 | endingText = endingText + "," + lineArray[0].strip() 358 | 359 | translationDictionary[lineArray[0].strip()] = lineArray[1].strip() 360 | 361 | 362 | def printOutput(): 363 | outputFile = open('output.txt', 'w') 364 | global enableTranslation 365 | xArray = [] 366 | yArray = [] 367 | 368 | image = np.array(Image.open(fileName), dtype=np.uint8) 369 | fig, ax = plt.subplots(1) 370 | if houghTransform == True: 371 | for point in columnHandler.pointList: 372 | if columnHandler.getNearestLineToTheLeft(xStartThreshold) - 5 <= point.x <= columnHandler.getNearestLineToTheLeft(xStartThreshold) + 5: 373 | circ = Circle((point.x,point.y),5, color='r') 374 | else: 375 | circ = Circle((point.x,point.y),4) 376 | ax.add_patch(circ) 377 | 378 | for i in range(0, len(dataDictionaryArray)): 379 | outputString = [] 380 | for cell in dataDictionaryArray: 381 | if cell.row == i: 382 | outputString.append(cell) 383 | outputString.sort(key=lambda x: x.x) 384 | 385 | output = "" 386 | previousCol = -999 387 | mergedValue = "" 388 | # column verification has to come in here 389 | #Merge those texts separated by spaces - these have the same column value due to proximity but belong to different objects 390 | columnList = "" 391 | for index, value in enumerate(outputString): 392 | value.value = re.sub("\.", "", re.sub(",", "", value.value)) 393 | if index == 0: 394 | mergedValue = value.value 395 | previousCol = value.col 396 | columnList = str(value.col) 397 | rect = patches.Rectangle((int(value.lbx), int(value.lby)), value.w, value.h,linewidth=0.75,edgecolor='r', facecolor='none') 398 | ax.add_patch(rect) 399 | continue 400 | 401 | if value.col == previousCol and is_number(value.value) == False: 402 | mergedValue = mergedValue + " " + value.value if len(mergedValue) != 0 else value.value 403 | if index == len(outputString) - 1: 404 | output += mergedValue if len(output) == 0 else " , " + mergedValue 405 | else: 406 | if index == len(outputString) - 1: 407 | mergedValue = mergedValue + ", " + value.value if len(mergedValue) != 0 else value.value 408 | output += mergedValue if len(output) == 0 else " , " + mergedValue 409 | previousCol = value.col 410 | mergedValue = value.value #+ " ---- " + str(value.col) 411 | columnList = columnList + ", " + str(value.col) if len(columnList) != 0 else str(value.col) 412 | rect = patches.Rectangle((int(value.lbx), int(value.lby)), value.w, value.h,linewidth=0.75,edgecolor='r', facecolor='none') 413 | ax.add_patch(rect) 414 | 415 | if len(output) > 0: 416 | if enableTranslation == False: 417 | print("{} | {}".format(output, columnList), file = outputFile) 418 | else: 419 | outputArray = output.split(',') 420 | districtIndex = 0 421 | #If the rows are not numberd, this condition can be skipped. For UP bulletin, this makes sense. 422 | if(is_number(outputArray[0])): 423 | districtName = outputArray[1].strip() 424 | distrinctIndex = 1 425 | else: 426 | districtName = outputArray[0].strip() 427 | distrinctIndex = 0 428 | 429 | #Do a lookup for district name, if not found, discard the record and print a message. 430 | try: 431 | translatedValue = translationDictionary[districtName] 432 | outputString = translatedValue 433 | for index, value in enumerate(outputArray): 434 | if index > districtIndex: #and is_number(value): 435 | outputString += "," + value.strip() 436 | except KeyError: 437 | try: 438 | fuzzyDistrict = fuzzyLookup(translationDictionary,districtName) 439 | translatedValue = translationDictionary[fuzzyDistrict] 440 | except: 441 | print(f"Failed to find lookup for {districtName}") 442 | continue 443 | 444 | outputString = translatedValue 445 | for index, value in enumerate(outputArray): 446 | if index > districtIndex: 447 | outputString += "," + value.strip() 448 | print("{} | {}".format(outputString, columnList), file = outputFile) 449 | 450 | outputFile.close() 451 | ax.imshow(image) 452 | plt.savefig("image.png", dpi=300) 453 | plt.show() 454 | 455 | def fuzzyLookup(translationDictionary,districtName): 456 | ''' 457 | Use fuzzy string match to map incorrect districtnames 458 | to the ones in the dictionary 459 | ''' 460 | from fuzzywuzzy import process 461 | # Score cut-off of 90 seem to be working well for UP 462 | district = process.extractOne( 463 | districtName, 464 | translationDictionary.keys(), 465 | score_cutoff = 90)[0] 466 | print(f"WARN : {districtName} mapped to {district} using Fuzzy Lookup") 467 | return district 468 | 469 | 470 | def parseConfigFile(fileName): 471 | global startingText 472 | global endingText 473 | global enableTranslation 474 | global translationFile 475 | global configyInterval 476 | global configxInterval 477 | global houghTransform 478 | global configMinLineLength 479 | 480 | configFile = open(fileName, "r") 481 | for index, line in enumerate(configFile): 482 | lineArray = line.split(':') 483 | if len(lineArray) < 2: 484 | continue 485 | 486 | key = lineArray[0].strip() 487 | value = lineArray[1].strip() 488 | 489 | if key == "startingText": 490 | if ',' in value: 491 | startingText = value.split(',')[0] 492 | endingText = value.split(',')[1] 493 | else: 494 | startingText = value 495 | if key == "enableTranslation": 496 | enableTranslation = eval(value) 497 | if key == "translationFile": 498 | translationFile = value 499 | if key == "xInterval": 500 | configxInterval = int(value) 501 | if key == "yInterval": 502 | configyInterval = int(value) 503 | if key == "houghTransform": 504 | houghTransform = eval(value) 505 | if key == "configMinLineLength": 506 | configMinLineLength = eval(value) 507 | 508 | def main(): 509 | global startingText 510 | global endingText 511 | global enableTranslation 512 | global houghTransform 513 | global fileName 514 | # If given, this text will be used to ignore those items above and to the left of this text. This can cause issues if the text is repeated! 515 | houghTransform = False 516 | if len(sys.argv) > 1: 517 | parseConfigFile(sys.argv[1]) 518 | fileName = sys.argv[2] 519 | 520 | buildTranslationDictionary() 521 | 522 | buildCells() 523 | buildCellsV2() 524 | if houghTransform == True: 525 | print("Using houghTransform to figure out columns. Set houghTransform:False in ocrconfig.meta.orig to disable this") 526 | detectLines() 527 | 528 | if len(startingText) != 0 or len(endingText) != 0: 529 | buildReducedArray() 530 | 531 | assignRowsAndColumns() 532 | 533 | printOutput() 534 | if __name__ == '__main__': 535 | main() 536 | -------------------------------------------------------------------------------- /extract.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import requests 3 | import sys 4 | import json 5 | import re 6 | import datetime 7 | folderName = "data/" + sys.argv[1] + "/" 8 | 9 | #url = "https://dashboard.kerala.gov.in/index.php" 10 | #response = requests.request("GET", url) 11 | #cookie=(response.headers['Set-Cookie']).split(';')[0] 12 | # 13 | #url = "https://dashboard.kerala.gov.in/testing-view-public.php" 14 | # 15 | #payload = {} 16 | #headers = { 17 | # 'Host': 'www.dashboard.kerala.gov.in', 18 | # 'Connection': 'keep-alive', 19 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 20 | # 'Accept-Encoding': 'gzip, deflate, br', 21 | # 'Accept-Language': 'en-US,en;q=0.5', 22 | # 'Referer': 'https://dashboard.kerala.gov.in/index.php', 23 | # 'Cookie': cookie 24 | #} 25 | # 26 | #response = requests.request("GET", url, headers=headers, data = payload) 27 | #soup = BeautifulSoup(response.content, 'html5lib') 28 | #table = soup.find("table") 29 | #rows=table.find_all("tr") 30 | # 31 | #for row in rows: 32 | # data = row.find_all("td") 33 | # if len(data) > 0 : 34 | # print(data[0].get_text() + "," + data[1].get_text() + "," + data[2].get_text()) 35 | # 36 | #url = "http://hmfw.ap.gov.in/covid_dashboard.aspx" 37 | #response = requests.request("GET", url) 38 | #soup = BeautifulSoup(response.content, 'html5lib') 39 | #samplesTested = soup.find("span", id="lblSamples") 40 | #samplesNegative = soup.find("span", id="lblNegative") 41 | # 42 | #print("AP: Samples tested: " + samplesTested.get_text()) 43 | #print("AP: Samples negative: " + samplesNegative.get_text()) 44 | # 45 | # 46 | #url = "http://www.rajswasthya.nic.in" 47 | #response = requests.request("GET", url) 48 | #soup = BeautifulSoup(response.content, 'html5lib') 49 | #rows=soup.find("blockquote").find("tbody").find_all("tr") 50 | # 51 | #for row in rows: 52 | # data = row.find_all("font") 53 | # if len(data) > 0: 54 | # print(re.sub(' +', ' ', data[1].get_text()) + "," + re.sub(' +', ' ', data[2].get_text())) 55 | 56 | 57 | #masterUrl="http://chdcovid19.in/" 58 | #response = requests.request("GET", masterUrl) 59 | #soup = BeautifulSoup(response.content, 'html5lib') 60 | #table = soup.find_all("div", {"class":"card stats bg-danger text-white text-center"}) 61 | #print(table) 62 | #print(soup.prettify()) 63 | 64 | 65 | metaArray = [] 66 | class ExtractMeta: 67 | def __init__(self, url, stateName, district): 68 | if district == "": 69 | self.districtRequired = False 70 | else: 71 | self.districtRequired = True 72 | self.url = url 73 | self.stateName = stateName 74 | 75 | with open("extract.meta", "r") as metaFile: 76 | for line in metaFile: 77 | if line.startswith('#'): 78 | continue 79 | lineArray = line.strip().split(',') 80 | metaObject = ExtractMeta(lineArray[0].strip(), lineArray[1].strip(), lineArray[2].strip()) 81 | metaArray.append(metaObject) 82 | 83 | 84 | def getDataForStates(): 85 | outputToWrite=[] 86 | header = "State, Last Updated, Samples Tested, Samples Positive, Samples Negative, Results Awaited, Total Confirmed, Total Active, Total Dischargedl, url\n" 87 | outputToWrite.append(header) 88 | 89 | for metaObject in metaArray: 90 | if metaObject.districtRequired == True: 91 | districtDetailsExtractor(metaObject) 92 | else: 93 | stateDetailsExtractor(metaObject, outputToWrite) 94 | 95 | writeToOutputCsv("summary.csv", outputToWrite) 96 | 97 | def writeToOutputCsv(fileName, dataToWrite): 98 | testingNumbersFile = open(folderName + fileName, "w") 99 | testingNumbersFile.writelines(dataToWrite) 100 | testingNumbersFile.close() 101 | 102 | 103 | def stateDetailsExtractor(metaObject, outputString): 104 | print("Data fetching for " + metaObject.stateName + ", " + metaObject.url) 105 | url = metaObject.url 106 | try: 107 | response = requests.request("GET", url) 108 | except: 109 | print("Error occurred while doing a request to " + url) 110 | return False 111 | soup = BeautifulSoup(response.content, 'html5lib') 112 | 113 | if metaObject.stateName == "Andhra Pradesh": 114 | samplesTested = soup.find("span", id = "lblSamples").get_text() 115 | samplesNegative = soup.find("span", id = "lblNegative").get_text() 116 | confirmed = soup.find("span", id = "lblConfirmed").get_text() 117 | active = soup.find("span", id = "lblActive").get_text() 118 | discharged = soup.find("span", id = "lblDischarged").get_text() 119 | lastUpdated = datetime.datetime.strptime(soup.find("span", id = "lblLast_Update").get_text(), "%d-%m-%Y %I:%M:%S %p") 120 | outputString.append("Andhra Pradesh, " + lastUpdated.strftime("%d/%m/%Y") + ", " + samplesTested + ", " + confirmed +","+ samplesNegative + ",,"+ confirmed +","+ active +", "+ discharged + "," + url + "\n") 121 | 122 | if metaObject.stateName == "Arunachal Pradesh": 123 | row = soup.find("tbody").find("tr") 124 | for index, data in enumerate(row.find_all("td")): 125 | if index == 0: 126 | lastUpdated = data.get_text() 127 | if index == 1: 128 | samplesTested = data.get_text() 129 | if index == 4: 130 | samplesNegative = data.get_text() 131 | if index == 5: 132 | samplesPositive = data.get_text() 133 | if index == 3: 134 | resultsAwaited = data.get_text() 135 | if index == 7: 136 | active = data.get_text() 137 | if index == 6: 138 | cured = data.get_text() 139 | 140 | outputString.append("Arunachal Pradesh, " + lastUpdated + ", " + samplesTested + ", " + samplesPositive + ", " + samplesNegative +","+ resultsAwaited +"," + samplesPositive + "," + active +"," + cured + "," + url + "\n") 141 | 142 | if metaObject.stateName == "Chandigarh": 143 | divs = soup.find("div", {"class": "col-lg-8 col-md-9 form-group pt-10"}).find_all("div", {"class": "col-md-3"}) 144 | 145 | dataDictionary = {} 146 | for div in divs: 147 | innerDiv = div.find("div", {'class': 'stats'}).find_all('div') 148 | dataDictionary[innerDiv[0].get_text()] = innerDiv[1].get_text() 149 | 150 | rowString = "Chandigarh, " + datetime.date.today().strftime("%d/%m/%Y") 151 | orderArray = ['Total Sampled', 'Confirmed', 'Negative Cases', 'Result Awaited', 'Confirmed', '', 'Recovered'] 152 | rowString = buildRowString(url, orderArray, rowString, dataDictionary) 153 | 154 | outputString.append(rowString) 155 | 156 | if metaObject.stateName == "Gujarat": 157 | divs = soup.find_all("div", {"class": "dashboard-status"}) 158 | date = soup.find("span", id="ctl00_body_lblDate").get_text() 159 | dataDictionary = {} 160 | 161 | for div in divs: 162 | value = div.find("h3") 163 | key = div.find_all("h5") 164 | dataDictionary[key[len(key)-1].get_text().strip()] = value.get_text() 165 | 166 | rowString = "Gujarat, " + date 167 | orderArray = ['Cases Tested for COVID19', '', '', '', '', '', 'Patients Recovered', 'People Under Quarantine'] 168 | rowString = buildRowString(url, orderArray, rowString, dataDictionary) 169 | outputString.append(rowString) 170 | 171 | if metaObject.stateName == "Kerala": 172 | table = soup.find('table', {"class": "table-bordered"}).find_all("tr") 173 | date = soup.find("small").get_text() 174 | dataDictionary = {} 175 | keys = table[0].find_all("td") 176 | values = table[1].find_all("td") 177 | for index, value in enumerate(values): 178 | dataDictionary[keys[index].get_text().strip()] = value.get_text().strip() 179 | 180 | keys = soup.find('section', {"class": "content"}).find("div", {"class": "container-fluid"}).find("div", {"class": "row"}).find_all("p") 181 | values = soup.find('section', {"class": "content"}).find("div", {"class": "container-fluid"}).find("div", {"class": "row"}).find_all("h3") 182 | 183 | for index, value in enumerate(values): 184 | if '(' in keys[index].get_text().strip(): 185 | key = keys[index].get_text().strip().split('(')[0] 186 | else: 187 | key = keys[index].get_text().strip() 188 | dataDictionary[key] = value.get_text().strip() 189 | 190 | rowString = "Kerala, " + date 191 | orderArray = ['Total Sent', 'Tested Positive', 'Tested Negative', 'Result Awaiting', 'Total Confirmed', 'Active Cases ', 'Recovered '] 192 | rowString = buildRowString(url, orderArray, rowString, dataDictionary) 193 | outputString.append(rowString) 194 | 195 | if metaObject.stateName == "Nagaland": 196 | keys = soup.find("div", {"class": "row"}).find_all('p') 197 | values = soup.find("div", {"class": "row"}).find_all(['h1', 'h3']) 198 | 199 | dataDictionary = {} 200 | for index, value in enumerate(values): 201 | dataDictionary[keys[index].get_text().strip()] = value.get_text().strip() 202 | 203 | print(dataDictionary) 204 | 205 | rowString = "Nagaland, " + datetime.date.today().strftime("%d/%m/%Y") 206 | orderArray = ['', '', '', '', 'CONFIRMED', 'ACTIVE', 'RECOVERED'] 207 | rowString = buildRowString(url, orderArray, rowString, dataDictionary) 208 | 209 | outputString.append(rowString) 210 | 211 | if metaObject.stateName == "Odisha": 212 | divs = soup.find_all("div", {"class": "info-box"}) 213 | date = soup.find("div", {"class": "toplink-section d-flex justify-content-center align-items-center"}).find("small").get_text() 214 | dataDictionary = {} 215 | for div in divs: 216 | key = re.sub(' +', '', div.find("p").get_text().strip()) 217 | value = re.sub(',', '', re.sub(' +\[.*', '', div.find("h5").get_text().strip())) 218 | dataDictionary[key] = value 219 | 220 | rowString = "Odisha, " + date 221 | orderArray = ['TotalTestsDone', 'PositiveResult', 'NegativeResult', '', 'Confirmed', 'Active', 'Recovered'] 222 | rowString = buildRowString(url, orderArray, rowString, dataDictionary) 223 | outputString.append(rowString) 224 | 225 | header = "State, Last Updated, Samples Tested, Samples Positive, Samples Negative, Results Awaited, Total Confirmed, Total Active, Total Discharged\n" 226 | if metaObject.stateName == "Puducherry": 227 | divRows = soup.find_all("div", {"class": "row"}) 228 | divs = divRows[0].find_all("div", {"class": "card-body"}) 229 | date = soup.find("footer").find("div", {"class": "col-6 text-left"}).get_text() 230 | dataDictionary = {} 231 | for div in divs: 232 | value = div.find("span").get_text() 233 | div.find("span").decompose() 234 | key = div.get_text().split('(')[0].strip() if '(' in div.get_text() else div.get_text() 235 | dataDictionary[key] = value 236 | 237 | divs = divRows[2].find_all("div", {"class": "col-xl-6"})[1].find("table") 238 | keys = divs.find_all("th") 239 | values = divs.find_all("td") 240 | 241 | for index, value in enumerate(values): 242 | dataDictionary[keys[index].get_text().strip()] = value.get_text().strip() 243 | 244 | 245 | rowString = "Puducherry, " + date 246 | 247 | order = ['Total Samples Sent', 'Total Positive', 'Total Negative', 'Result Awaiting', 'Total Reported', 'Active Case', 'Cured'] 248 | rowString = buildRowString(url, order, rowString, dataDictionary) 249 | outputString.append(rowString) 250 | 251 | header = "State, Last Updated, Samples Tested, Samples Positive, Samples Negative, Results Awaited, Total Confirmed, Total Active, Total Discharged\n" 252 | if metaObject.stateName == "Rajasthan": 253 | table = soup.find("table").find_all("tr")[1].find_all("table")[3].find_all("div") 254 | date = re.sub(' +', ' ', re.sub('\n', ' ', soup.find("table").find_all("tr")[1].find_all("div", {"align": "right"})[1].get_text().strip())) 255 | keys = [] 256 | values = [] 257 | dataDictionary = {} 258 | for index, row in enumerate(table): 259 | if row.find("br") != None: 260 | row.find("br").decompose() 261 | valuesArray = re.sub(' +', '', row.get_text().strip()).split('\n') 262 | value = valuesArray.pop(0) 263 | dataDictionary[' '.join(valuesArray)] = value 264 | 265 | rowString = "Rajasthan, " + date 266 | orderArray = ['TotalSample Collected', 'Positive Cases', 'Negative Cases', 'Report Awaited', 'Positive Cases', '', 'Cured/Recovered'] 267 | rowString = buildRowString(url, orderArray, rowString, dataDictionary) 268 | outputString.append(rowString) 269 | 270 | 271 | 272 | 273 | def buildRowString(url, orderArray, rowString, dataDictionary): 274 | 275 | for key in orderArray: 276 | rowString = rowString + "," + dataDictionary[key] if len(key) > 0 else rowString + "," 277 | rowString += "," + url + "\n" 278 | return rowString 279 | 280 | 281 | 282 | def nagalandTableExtractor(soupObject, districtDictionary, firstPass): 283 | for index, row in enumerate(soupObject): 284 | if index == 0: 285 | dataElements = row.find_all("th") 286 | else: 287 | dataElements = row.find_all("td") 288 | 289 | rowString="" 290 | currentDistrict = "" 291 | for data in dataElements: 292 | if len(rowString) == 0: 293 | currentDistrict = data.get_text() 294 | rowString = data.get_text() if len(rowString) == 0 else rowString + "," + data.get_text() 295 | 296 | rowString = rowString.replace('-', '/') 297 | if firstPass == False: 298 | rowString = rowString + "\n" 299 | districtDictionary[currentDistrict] = rowString if firstPass == True else districtDictionary[currentDistrict] + "," + rowString 300 | 301 | def readAllEntriesForATable(table, outputString, itemToSearch, itemsToAppend, itemsToRemove): 302 | for index, row in enumerate(table): 303 | data = row.find_all(itemToSearch) 304 | 305 | if len(itemsToRemove) != 0: 306 | for sub in data('font'): 307 | sub.decompose() 308 | 309 | rowString = "" 310 | for value in data: 311 | rowString = str(value.get_text()).strip() if len(rowString) == 0 else rowString + "," + str(value.getText()).strip() 312 | rowString = re.sub('\n', '', rowString) 313 | rowString = re.sub(' +', ' ', rowString) 314 | 315 | if len(rowString) > 0: 316 | rowString = rowString + "," + str(itemsToAppend) + "\n" if len(itemsToAppend) > 0 else rowString + "\n" 317 | outputString.append(rowString) 318 | 319 | 320 | 321 | def districtDetailsExtractor(metaObject): 322 | print("Data fetching for " + metaObject.stateName + ", " + metaObject.url) 323 | outputString = [] 324 | url = metaObject.url 325 | 326 | try: 327 | response = requests.request("GET", url) 328 | except: 329 | print("Error occurred while doing a request to " + url) 330 | return False 331 | 332 | soup = BeautifulSoup(response.content, 'html5lib') 333 | 334 | if metaObject.stateName == "Nagaland": 335 | table = soup.find("table").find_all("tr") 336 | districtDictionary = {} 337 | nagalandTableExtractor(table, districtDictionary, True) 338 | 339 | response = requests.request("GET", "https://covid19.nagaland.gov.in/") 340 | soup = BeautifulSoup(response.content, 'html5lib') 341 | table = soup.find("div", id="case-data").find("table").find_all("tr") 342 | nagalandTableExtractor(table, districtDictionary, False) 343 | 344 | outputString.append(districtDictionary['District']) 345 | for k, v in districtDictionary.items(): 346 | if str(k) != 'District': 347 | outputString.append(str(v)) 348 | outputString.append("\n" + url) 349 | 350 | writeToOutputCsv("nagalandDistrict.csv", outputString) 351 | 352 | if metaObject.stateName == 'Odisha': 353 | url = "https://statedashboard.odisha.gov.in/ajax/heatMapHospital?type=Current" 354 | try: 355 | response = requests.request("GET", url) 356 | except: 357 | print("Error occurred while doing a request to " + url) 358 | return False 359 | outputString.append("DistrictName,NoOfHospitals,NoOfBeds,NoOfICU\n") 360 | for data in response.json(): 361 | dataString = data['vchDistrctName'] + "," + str(data['intNoOfHospital']) + "," + str(data['intNoOfBed']) + "," + str(data['intNoOfICU']) + "\n" 362 | outputString.append(dataString) 363 | outputString.append(url) 364 | writeToOutputCsv("OdishaDistrictBeds.csv", outputString) 365 | 366 | 367 | if metaObject.stateName == 'Puducherry': 368 | div = soup.find_all("div", {"class": "col-md-6"}) 369 | date = div[1].find("h5").get_text().replace('-', '/') 370 | table = div[1].find("table").find_all("tr") 371 | 372 | readAllEntriesForATable(table, outputString, "th", date, '') 373 | readAllEntriesForATable(table, outputString, "td", date, '') 374 | 375 | outputString.append(url) 376 | writeToOutputCsv("Puducherry.csv", outputString) 377 | 378 | 379 | if metaObject.stateName == "Gujarat": 380 | div = soup.find("div", {"class": "card-body p-1"}) 381 | date = soup.find("span", id="ctl00_body_lblDate").get_text() 382 | table = div.find("table").find_all("tr") 383 | 384 | tempOutputString = [] 385 | readAllEntriesForATable(table, tempOutputString, "th", 'Last Updated', '') 386 | 387 | 388 | table = div.find("table").find_all("tr") 389 | readAllEntriesForATable(table, tempOutputString, "span", date, '') 390 | 391 | districtNames = [] 392 | 393 | for row in table: 394 | data = row.find("td") 395 | if data is not None: 396 | districtNames.append(data.get_text().strip()) 397 | 398 | for index, value in enumerate(tempOutputString): 399 | if index == 0: 400 | outputString.append(value) 401 | else: 402 | districtString = districtNames[index - 1] + "," + value 403 | outputString.append(districtString) 404 | 405 | outputString.append(url) 406 | writeToOutputCsv("GujaratDistrict.csv", outputString) 407 | 408 | if metaObject.stateName == 'Andhra Pradesh': 409 | response = requests.request("POST", url).json() 410 | try: 411 | response = requests.request("POST", url).json() 412 | except: 413 | print("Error occurred while doing a request to " + url) 414 | return False 415 | districtDictionary = {} 416 | 417 | districtDictionary['District'] = "Cases,Active,Recovered,Death,Total Samples,Total Positive,Total Negative,Total Inprogress, Total, Beds, Hall, Rooms" 418 | for cases in (response['cases_district']): 419 | districtDictionary[cases['district_name']] = cases['cases'] +","+ cases['active'] +","+ cases['recovered'] +","+ cases['death'] 420 | 421 | for cases in (response['samples_district']): 422 | districtDictionary[cases['district_name']] = districtDictionary[cases['district_name']] + "," + cases['total'] +","+ cases['positive'] +","+ cases['negitive'] +","+ cases['inprogress'] 423 | 424 | for cases in (response['infra_district']): 425 | districtDictionary[cases['district_name']] = districtDictionary[cases['district_name']] + "," + cases['total'] +","+ cases['beds'] +","+ cases['hall'] +","+ cases['rooms'] 426 | 427 | 428 | for k, v in districtDictionary.items(): 429 | outputString.append(str(k) + "," + str(v) + "\n") 430 | 431 | outputString.append(url) 432 | writeToOutputCsv("APDistrict.csv", outputString) 433 | 434 | if metaObject.stateName == 'Rajasthan': 435 | table = soup.find('blockquote').find('table').find_all('tr') 436 | 437 | tempOutputString = [] 438 | readAllEntriesForATable(table, tempOutputString, "font", '', '') 439 | 440 | 441 | for index, row in enumerate(tempOutputString): 442 | if 'Discharged' in row: 443 | row = "SR. No, District - Country,Total Sample Received,Todays Positive,Cumulative Positive,Recovered,Discharged\n" 444 | if 'Other District' in row: 445 | row = "," + row 446 | if 'Total,' in row: 447 | rowValue = row.split(',') 448 | rowString = ""; 449 | for headerIndex, data in enumerate(rowValue): 450 | if headerIndex%2 == 0: 451 | rowString = rowString + "," + data 452 | row = rowString + "\n" 453 | if 'Grand Total' in row: 454 | row = "," + row 455 | if 'BSF' in row: 456 | row = ",," + row 457 | if 'Evacuees' in row: 458 | row = ",," 459 | if 'Italy' in row: 460 | row = ",," 461 | 462 | outputString.append(row) 463 | 464 | outputString.append(url) 465 | writeToOutputCsv("Rajasthan.csv", outputString) 466 | 467 | getDataForStates() 468 | 469 | 470 | 471 | 472 | -------------------------------------------------------------------------------- /automation/x: -------------------------------------------------------------------------------- 1 | Using pageId: 2 | [[{'id': '5f395a2f0deffa1bd752be5b', 'tagId': '5dd152552fc63e490ca55adb', 'header': False, 'name': 'East Khasi Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 73617, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 36632, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 36632, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 915}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 1253}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 35267}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 37435}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5d', 'tagId': '5dd152552fc63e490ca55add', 'header': False, 'name': 'RiBhoi', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 17995, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 9119, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 9119, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 76}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 249}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 8797}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 9122}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5c', 'tagId': '5dd152552fc63e490ca55ae1', 'header': False, 'name': 'West Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 17628, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 8852, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 8852, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 71}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 78}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 8704}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 8853}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5f', 'tagId': '5dd152552fc63e490ca55ae2', 'header': False, 'name': 'West Jaintia Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 12075, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 6133, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 6133, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 115}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 189}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 5828}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 6132}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be62', 'tagId': '5dd152552fc63e490ca55ae3', 'header': False, 'name': 'West Khasi Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 7295, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 3802, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 3802, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 57}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 309}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 3436}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 3802}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5e', 'tagId': '5dd152552fc63e490ca55ada', 'header': False, 'name': 'East Jaintia Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 4627, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2336, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2336, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 30}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 45}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 2261}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 2336}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be66', 'tagId': '5dd152552fc63e490ca55ae0', 'header': False, 'name': 'South West Khasi Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 4480, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2300, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2300, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 14}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 120}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 2166}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 2300}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be60', 'tagId': '5dd152552fc63e490ca55adf', 'header': False, 'name': 'South West Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 3663, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1841, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1841, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 16}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 19}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1806}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1841}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be65', 'tagId': '5dd152552fc63e490ca55ad9', 'header': False, 'name': 'East Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 3618, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1820, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1820, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 9}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 22}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1789}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1820}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be61', 'tagId': '5dd152552fc63e490ca55adc', 'header': False, 'name': 'North Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 3309, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1658, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1658, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 8}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 7}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1643}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1658}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be63', 'tagId': '5dd152552fc63e490ca55ade', 'header': False, 'name': 'South Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 2406, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1205, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1205, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 12}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 4}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1189}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1205}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}]] 3 | 4 | ********************Meghalaya******************** 5 | East Khasi Hills,Meghalaya,ML,2309,Recovered 6 | Ribhoi,Meghalaya,ML,669,Recovered 7 | West Garo Hills,Meghalaya,ML,976,Recovered 8 | West Jaintia Hills,Meghalaya,ML,562,Recovered 9 | West Khasi Hills,Meghalaya,ML,480,Recovered 10 | East Jaintia Hills,Meghalaya,ML,179,Recovered 11 | South West Khasi Hills,Meghalaya,ML,313,Recovered 12 | South West Garo Hills,Meghalaya,ML,77,Recovered 13 | East Garo Hills,Meghalaya,ML,98,Recovered 14 | North Garo Hills,Meghalaya,ML,117,Recovered 15 | South Garo Hills,Meghalaya,ML,91,Recovered 16 | East Khasi Hills,Meghalaya,ML,814,Deceased 17 | Ribhoi,Meghalaya,ML,61,Deceased 18 | West Garo Hills,Meghalaya,ML,64,Deceased 19 | West Jaintia Hills,Meghalaya,ML,103,Deceased 20 | West Khasi Hills,Meghalaya,ML,36,Deceased 21 | East Jaintia Hills,Meghalaya,ML,25,Deceased 22 | South West Khasi Hills,Meghalaya,ML,12,Deceased 23 | South West Garo Hills,Meghalaya,ML,15,Deceased 24 | East Garo Hills,Meghalaya,ML,9,Deceased 25 | North Garo Hills,Meghalaya,ML,8,Deceased 26 | South Garo Hills,Meghalaya,ML,10,Deceased 27 | StateTotal, 76504, 72886, 1323 28 | SiteTotal, 76504, 67015, 166 29 | Dashboard url: https://services7.arcgis.com/nzBTI19PTHBZaEPT/arcgis/rest/services/Admin_Boundary/FeatureServer/1/query?f=json&returnGeometry=false&outFields=*&where=1=1 30 | -------------------------------------------------------------------------------- /automation/automation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import datetime 3 | import csv 4 | import requests 5 | import json 6 | import pdftotext 7 | import sys 8 | import os 9 | import re 10 | import logging 11 | import camelot 12 | from bs4 import BeautifulSoup 13 | import html5lib 14 | from deltaCalculator import DeltaCalculator 15 | 16 | 17 | ''' 18 | To add a new state: 19 | 20 | Make an entry into automation.meta file. 21 | Write a function GetData() 22 | Inside this function fetch/read files and prepare an array of hashes. 23 | Each hash should be of the format: 24 | { 25 | "districtName": nameOfTheDistrict, 26 | "confirmed": TotalConfirmedCount, 27 | "recovered": TotalRecoveredCount, 28 | "deceased": TotalDeceasedCount 29 | } 30 | In case any of the values is unknown, pass -999 as the value. All keys are mandatory. 31 | 32 | Pass these values to the deltaCalculator.getStateDataFromSite function with the state name. 33 | Eg: deltaCalculator.getStateDataFromSite("Arunachal Pradesh", districtArray, option). The value for options are: full/detailed/. These values are passed via command line. 34 | 35 | The deltaCalculator object will return the valules to be added for today for the three categories across all districts mentioned. 36 | 37 | In case there are name mappings required, i.e, if the district name in the bulletin and the district name in the site are different, make entries in nameMapping.meta file. 38 | This file has , , as the format for each line. 39 | 40 | For any pdf reading, refer to readFileFromURLV2 function. This needs to be called from within the GetData() function. 41 | ''' 42 | 43 | 44 | 45 | logging.basicConfig(filename='deltaCalculator.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) 46 | deltaCalculator = DeltaCalculator() 47 | metaDictionary = {} 48 | option = "" 49 | typeOfAutomation = "url" 50 | pdfUrl = "" 51 | pageId = "" 52 | 53 | ''' This class holds the data from automation.meta file. This allows for better management of meta data ''' 54 | class AutomationMeta: 55 | def __init__(self, stateName, stateCode, url): 56 | self.stateName = stateName 57 | self.stateCode = stateCode 58 | self.url = url 59 | 60 | def fetchData(stateName): 61 | if stateName == "All States": 62 | for key, metaObject in metaDictionary.items(): 63 | if len(metaObject.url.strip()) > 0: 64 | logging.info("Calling delta calculator for: " + metaObject.stateCode) 65 | eval(metaObject.stateCode + "GetData()") 66 | print("Dashboard url: " + metaObject.url) 67 | else: 68 | try: 69 | logging.info("Calling delta calculator for: " + metaDictionary[stateName].stateCode) 70 | eval(metaDictionary[stateName].stateCode + "GetData()") 71 | print("Dashboard url: " + metaDictionary[stateName].url) 72 | except KeyError: 73 | print("No entry found for state {} in automation.meta file".format(stateName)) 74 | 75 | def loadMetaData(): 76 | with open("automation.meta", "r") as metaFile: 77 | for line in metaFile: 78 | if line.startswith('#'): 79 | continue 80 | lineArray = line.strip().split(',') 81 | metaObject = AutomationMeta(lineArray[0].strip(), lineArray[1].strip(), lineArray[2].strip()) 82 | metaDictionary[lineArray[0].strip()] = metaObject 83 | metaFile.close() 84 | 85 | ''' 86 | def getAllColumnValues(): 87 | columnSet = set() 88 | with open(".tmp/ct.txt", "r") as upFile: 89 | for line in upFile: 90 | for col in line.split('|')[1].split(','): 91 | columnSet.add(re.sub('\n', '', col.strip()) 92 | return sorted(columnSet) 93 | ''' 94 | 95 | def CTGetData(): 96 | districtArray = [] 97 | '''columnNumbers = getAllColumnValues()''' 98 | with open(".tmp/ct.txt", "r") as upFile: 99 | for line in upFile: 100 | linesArray = line.split('|')[0].split(',') 101 | availableColumns = line.split('|')[1].split(',') 102 | 103 | districtDictionary = {} 104 | districtDictionary['deceased'] = 0 105 | confirmedFound = False 106 | recoveredFound = False 107 | deceasedFound = False 108 | for index, data in enumerate(linesArray): 109 | if availableColumns[index].strip() == "2": 110 | districtDictionary['districtName'] = data.strip() 111 | if availableColumns[index].strip() == "4": 112 | districtDictionary['confirmed'] = int(data.strip()) 113 | confirmedFound = True 114 | if availableColumns[index].strip() == "9": 115 | districtDictionary['recovered'] = int(data.strip()) 116 | recoveredFound = True 117 | if availableColumns[index].strip() == "12": 118 | districtDictionary['deceased'] += int(data.strip()) 119 | deceasedFound = True 120 | 121 | #print(districtDictionary) 122 | if recoveredFound == False or confirmedFound == False: 123 | print("--> Issue with {}".format(linesArray)) 124 | continue 125 | districtArray.append(districtDictionary) 126 | upFile.close() 127 | 128 | deltaCalculator.getStateDataFromSite("Chhattisgarh", districtArray, option) 129 | 130 | def APGetData(): 131 | if typeOfAutomation == "ocr": 132 | APGetDataByOCR() 133 | elif typeOfAutomation == "pdf": 134 | APGetDataByPdf() 135 | else: 136 | APGetDataByUrl() 137 | 138 | def APGetDataByPdf(): 139 | linesArray = [] 140 | districtDictionary = {} 141 | districtArray = [] 142 | readFileFromURLV2(metaDictionary['Andhra Pradesh'].url, "Andhra Pradesh", "Anantapur", "") 143 | try: 144 | with open(".tmp/ap.csv", "r") as upFile: 145 | for line in upFile: 146 | linesArray = line.split(',') 147 | if len(linesArray) != 4: 148 | print("--> Issue with {}".format(linesArray)) 149 | continue 150 | districtDictionary = {} 151 | districtDictionary['districtName'] = linesArray[0].strip() 152 | districtDictionary['confirmed'] = int(linesArray[1]) 153 | districtDictionary['recovered'] = int(linesArray[2]) 154 | districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 155 | districtArray.append(districtDictionary) 156 | 157 | upFile.close() 158 | deltaCalculator.getStateDataFromSite("Andhra Pradesh", districtArray, option) 159 | except FileNotFoundError: 160 | print("ap.csv missing. Generate through pdf or ocr and rerun.") 161 | 162 | def APGetDataByOCR(): 163 | districtArray = [] 164 | with open(".tmp/ap.txt", "r") as upFile: 165 | for line in upFile: 166 | if 'Total' in line: 167 | continue 168 | 169 | linesArray = line.split('|')[0].split(',') 170 | if len(linesArray) != 6: 171 | print("--> Issue with {}".format(linesArray)) 172 | continue 173 | 174 | districtDictionary = {} 175 | districtName = linesArray[0].strip() 176 | districtDictionary['districtName'] = linesArray[0].strip() 177 | districtDictionary['confirmed'] = int(linesArray[2]) 178 | districtDictionary['recovered'] = int(linesArray[4]) 179 | districtDictionary['deceased'] = int(linesArray[5]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 180 | districtArray.append(districtDictionary) 181 | upFile.close() 182 | 183 | deltaCalculator.getStateDataFromSite("Andhra Pradesh", districtArray, option) 184 | 185 | def ARGetDataByOcr(): 186 | districtArray = [] 187 | additionalDistrictInfo = {} 188 | additionalDistrictInfo['districtName'] = 'Papum Pare' 189 | additionalDistrictInfo['confirmed'] = 0 190 | additionalDistrictInfo['recovered'] = 0 191 | additionalDistrictInfo['deceased'] = 0 192 | 193 | with open(".tmp/ar.txt", "r") as upFile: 194 | for line in upFile: 195 | if 'Total' in line: 196 | continue 197 | 198 | linesArray = line.split('|')[0].split(',') 199 | if len(linesArray) != 14: 200 | print("--> Issue with {}".format(linesArray)) 201 | continue 202 | 203 | 204 | if linesArray[0].strip() == "Capital Complex" or linesArray[0].strip() == "Papum Pare": 205 | additionalDistrictInfo['confirmed'] += int(linesArray[5]) 206 | additionalDistrictInfo['recovered'] += int(linesArray[12]) 207 | additionalDistrictInfo['deceased'] += int(linesArray[13]) if len(re.sub('\n', '', linesArray[13])) != 0 else 0 208 | continue 209 | 210 | districtDictionary = {} 211 | districtName = linesArray[0].strip() 212 | districtDictionary['districtName'] = linesArray[0].strip() 213 | districtDictionary['confirmed'] = int(linesArray[5]) 214 | districtDictionary['recovered'] = int(linesArray[12]) 215 | districtDictionary['deceased'] = int(linesArray[13]) if len(re.sub('\n', '', linesArray[13])) != 0 else 0 216 | districtArray.append(districtDictionary) 217 | upFile.close() 218 | districtArray.append(additionalDistrictInfo) 219 | 220 | deltaCalculator.getStateDataFromSite("Arunachal Pradesh", districtArray, option) 221 | 222 | def ARGetData(): 223 | if typeOfAutomation == "ocr": 224 | ARGetDataByOcr() 225 | return 226 | stateDashboard = requests.request("get", metaDictionary['Arunachal Pradesh'].url).json() 227 | districtArray = [] 228 | for districtDetails in stateDashboard: 229 | if districtDetails['district'] == 'Total': 230 | continue 231 | districtDictionary = {} 232 | districtDictionary['districtName'] = districtDetails['district'] 233 | districtDictionary['confirmed'] = int(districtDetails['confirmed']) 234 | districtDictionary['recovered'] = int(districtDetails['recovered']) 235 | districtDictionary['deceased'] = int(districtDetails['deceased']) 236 | 237 | districtArray.append(districtDictionary) 238 | 239 | deltaCalculator.getStateDataFromSite("Arunachal Pradesh", districtArray, option) 240 | 241 | def APGetDataByUrl(): 242 | response = requests.request("GET", metaDictionary['Andhra Pradesh'].url) 243 | soup = BeautifulSoup(response.content, 'html.parser') 244 | table = soup.find_all("table")[1].find_all("tr") 245 | 246 | districtArray = [] 247 | for index, row in enumerate(table): 248 | data = row.find_all("td") 249 | if 'Total' in data[0].get_text() or 'District' in data[0].get_text(): 250 | continue 251 | 252 | districtDictionary = {} 253 | districtDictionary['districtName'] = data[0].get_text() 254 | districtDictionary['confirmed'] = int(data[1].get_text()) 255 | districtDictionary['recovered'] = int(data[2].get_text()) 256 | districtDictionary['deceased'] = int(data[3].get_text()) 257 | districtArray.append(districtDictionary) 258 | 259 | """ 260 | stateDashboard = requests.request("post", metaDictionary['Andhra Pradesh'].url).json() 261 | 262 | for districtDetails in (stateDashboard['cases_district']): 263 | districtDictionary = {} 264 | districtDictionary['districtName'] = districtDetails['district_name'] 265 | districtDictionary['confirmed'] = int(districtDetails['cases']) 266 | districtDictionary['recovered'] = int(districtDetails['recovered']) 267 | districtDictionary['deceased'] = int(districtDetails['death']) 268 | 269 | districtArray.append(districtDictionary) 270 | """ 271 | deltaCalculator.getStateDataFromSite("Andhra Pradesh", districtArray, option) 272 | 273 | def ORGetData(): 274 | os.system("curl -sk https://statedashboard.odisha.gov.in/ | grep -i string | grep -v legend | sed 's/var result = JSON.stringify(//' |sed 's/);//' | head -1 > orsite.csv") 275 | 276 | districtArray = [] 277 | districtsData = [] 278 | with open("orsite.csv", "r") as metaFile: 279 | for line in metaFile: 280 | districtsData = json.loads(line) 281 | for data in districtsData: 282 | districtDictionary = {} 283 | districtDictionary['districtName'] = data['vchDistrictName'] 284 | districtDictionary['confirmed'] = int(data['intConfirmed']) 285 | districtDictionary['recovered'] = int(data['intRecovered']) 286 | districtDictionary['deceased'] = int(data['intDeceased']) + int(data['intOthDeceased']) 287 | districtArray.append(districtDictionary) 288 | 289 | deltaCalculator.getStateDataFromSite("Odisha", districtArray, option) 290 | 291 | def MHGetData(): 292 | if typeOfAutomation == "ocr": 293 | MHGetDataByOcr() 294 | else: 295 | MHGetDataByUrl() 296 | 297 | def VCMGetData(): 298 | global pageId 299 | print("Date, State, First Dose, Second Dose, Total Doses") 300 | 301 | lookback = int(pageId) if len(pageId) != 0 else 0 302 | for day in range(lookback, -1, -1): 303 | today = (datetime.date.today() - datetime.timedelta(days = day)).strftime("%Y-%m-%d") 304 | fileName=today+"-at-07-00-AM.pdf" 305 | 306 | pageId = "1" 307 | 308 | readFileFromURLV2(metaDictionary['VCMohfw'].url + fileName, "VCMohfw", "A & N Islands", "") 309 | dadra = {'firstDose': 0, 'secondDose': 0, 'totalDose': 0} 310 | 311 | try: 312 | with open(".tmp/vcm.csv", "r") as upFile: 313 | for line in upFile: 314 | if "Dadra" in line or "Daman" in line: 315 | dadra['firstDose'] += int(line.split(',')[1]) 316 | dadra['secondDose'] += int(line.split(',')[2]) 317 | dadra['totalDose'] += int(line.split(',')[3]) 318 | continue 319 | print(today + "," + line, end = "") 320 | 321 | print("{}, DnH, {}, {}, {}".format(today, dadra['firstDose'], dadra['secondDose'], dadra['totalDose'])) 322 | except FileNotFoundError: 323 | print("br.txt missing. Generate through pdf or ocr and rerun.") 324 | 325 | def VCGetData(): 326 | today = (datetime.date.today() - datetime.timedelta(days = 1)).strftime("%Y-%m-%d") 327 | #proxy = {"https":"http://159.65.153.14:8080"} 328 | #vaccineDashboardNation = requests.request("get", "https://api.cowin.gov.in/api/v1/reports/getPublicReports?state_id=&district_id=&date=2021-03-01").json() 329 | stateKeys = { 330 | '36': 'West Bengal', 331 | '7': 'Chhattisgarh', 332 | '31': 'Tamil Nadu', 333 | '20': 'Madhya Pradesh', 334 | '13': 'Himachal Pradesh', 335 | '4': 'Assam', 336 | '15': 'Jharkhand', 337 | '11': 'Gujarat', 338 | '28': 'Punjab', 339 | '17': 'Kerala', 340 | '32': 'Telangana', 341 | '33': 'Tripura', 342 | '10': 'Goa', 343 | '14': 'Jammu and Kashmir', 344 | '34': 'Uttar Pradesh', 345 | '29': 'Rajasthan', 346 | '5': 'Bihar', 347 | '21': 'Maharashtra', 348 | '2': 'Andhra Pradesh', 349 | '16': 'Karnataka', 350 | '35': 'Uttarakhand', 351 | '26': 'Odisha', 352 | '12': 'Haryana', 353 | '3': 'Arunachal Pradesh', 354 | '9': 'Delhi', 355 | '1': 'Andaman and Nicobar Islands', 356 | '24': 'Mizoram', 357 | '23': 'Meghalaya', 358 | '27': 'Puducherry', 359 | '18': 'Ladakh', 360 | '30': 'Sikkim', 361 | '25': 'Nagaland', 362 | '37': 'Daman and Diu', 363 | '22': 'Manipur', 364 | '39': 'Himachal', 365 | '6': 'Chandigarh', 366 | '8': 'Dadra and Nagar Haveli', 367 | '19': 'Lakshadweep', 368 | '0': 'India' 369 | } 370 | 371 | lookback = int(pageId) if len(pageId) != 0 else 0 372 | lookbackMaxDate = datetime.date(2021, 5, 21) 373 | if datetime.date.today() - datetime.timedelta(days = lookback) < lookbackMaxDate: 374 | lookback = (datetime.date.today() - lookbackMaxDate).days 375 | print("------------ Data beyond 21st May has different data ranges hence defaulting max lookback to max {} days--------- ".format(lookback)) 376 | print("date, state, district, daily vaccine count, beneficiaries, sessions, sites, vaccines given, vaccines given dose two, male, female, others, covaxin, covishield, sputnik, aefi, 18-45, 45-60, 60+") 377 | for day in range (lookback, -1, -1): 378 | today = (datetime.date.today() - datetime.timedelta(days = day)).strftime("%Y-%m-%d") 379 | todayStr = (datetime.date.today() - datetime.timedelta(days = day)).strftime("%d-%m-%Y") 380 | if option == "V2": 381 | metaDictionary['Vaccine'].url = "https://api.cowin.gov.in/api/v1/reports/v2/getPublicReports?state_id=@@state_id@@&district_id=@@district_id@@&date=@@date@@" 382 | url = re.sub('@@date@@', today, metaDictionary['Vaccine'].url) 383 | url_nation = re.sub('@@district_id@@', '', re.sub('@@state_id@@', '', url)) 384 | 385 | districtCount = 1 386 | 387 | if option == "V2": 388 | districtArray = getAndPrintVaccineDataV2(url_nation, '0', todayStr, stateKeys, '') 389 | else: 390 | districtArray = getAndPrintVaccineDataV1(url_nation, '0', todayStr, stateKeys, '') 391 | 392 | 393 | for state_code in range(1, 38, 1): 394 | url_state = re.sub('@@district_id@@', '', re.sub('@@state_id@@', str(state_code), url)) 395 | districtArray = [] 396 | 397 | if option == "V2": 398 | districtArray = getAndPrintVaccineDataV2(url_state, state_code, todayStr, stateKeys, '') 399 | else: 400 | districtArray = getAndPrintVaccineDataV1(url_state, state_code, todayStr, stateKeys, '') 401 | 402 | if not districtArray: 403 | continue 404 | for district in districtArray: 405 | url_district = re.sub('@@district_id@@', str(district['district_id']), re.sub('@@state_id@@', str(state_code), url)) 406 | if option == "V2": 407 | getAndPrintVaccineDataV2(url_district, state_code, todayStr, stateKeys, district['district_name']) 408 | else: 409 | getAndPrintVaccineDataV1(url_district, state_code, todayStr, stateKeys, district['district_name']) 410 | 411 | 412 | 413 | def getAndPrintVaccineDataV1(url, state_code, todayStr, stateKeys, districtName): 414 | 415 | vaccineDashboard = requests.request("get", url) 416 | if vaccineDashboard.status_code != 200: 417 | while True: 418 | vaccineDashboard = requests.request("get", url) 419 | if vaccineDashboard.status_code == 200: 420 | break 421 | vaccineDashboard = vaccineDashboard.json() 422 | if not vaccineDashboard: 423 | return 424 | gender = {'male': 0, 'female': 0, 'others': 0} 425 | #print(vaccineDashboard) 426 | for i in range (0, 3, 1): 427 | if vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['gender_label'].lower() == 'male': 428 | gender['male'] = vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['count'] 429 | if vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['gender_label'].lower() == 'female': 430 | gender['female'] = vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['count'] 431 | if vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['gender_label'].lower() == 'others': 432 | gender['others'] = vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['count'] 433 | 434 | typeOfVaccine = {'covaxin': 0, 'covishield': 0} 435 | for i in range (0, 2, 1): 436 | if vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['material_name'].lower() == 'covaxin': 437 | typeOfVaccine['covaxin'] = vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['count'] 438 | if vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['material_name'].lower() == 'covishield': 439 | typeOfVaccine['covishield'] = vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['count'] 440 | 441 | print("{}, {}, '{}', {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \ 442 | format(todayStr, \ 443 | stateKeys[str(state_code)], \ 444 | districtName, \ 445 | vaccineDashboard['dailyVaccineData']['vaccine_given'], \ 446 | vaccineDashboard['overAllReports']['Beneficiaries'], \ 447 | vaccineDashboard['overAllReports']['Sessions'], \ 448 | vaccineDashboard['overAllReports']['Sites'], \ 449 | vaccineDashboard['overAllReports']['Vaccine Given'], \ 450 | vaccineDashboard['overAllReports']['Vaccine Given Dose Two'], \ 451 | gender['male'], \ 452 | gender['female'], \ 453 | gender['others'], \ 454 | typeOfVaccine['covaxin'], \ 455 | typeOfVaccine['covishield'] 456 | )) 457 | with open('output.out','a') as file: 458 | print("{}, {}, '{}', {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \ 459 | format(todayStr, \ 460 | stateKeys[str(state_code)], \ 461 | districtName, \ 462 | vaccineDashboard['dailyVaccineData']['vaccine_given'], \ 463 | vaccineDashboard['overAllReports']['Beneficiaries'], \ 464 | vaccineDashboard['overAllReports']['Sessions'], \ 465 | vaccineDashboard['overAllReports']['Sites'], \ 466 | vaccineDashboard['overAllReports']['Vaccine Given'], \ 467 | vaccineDashboard['overAllReports']['Vaccine Given Dose Two'], \ 468 | gender['male'], \ 469 | gender['female'], \ 470 | gender['others'], \ 471 | typeOfVaccine['covaxin'], \ 472 | typeOfVaccine['covishield'] 473 | ), file = file) 474 | return vaccineDashboard['getBeneficiariesGroupBy'] 475 | 476 | def getAndPrintVaccineDataV2(url, state_code, todayStr, stateKeys, districtName): 477 | vaccineDashboard = requests.request("get", url) 478 | if vaccineDashboard.status_code != 200: 479 | while True: 480 | vaccineDashboard = requests.request("get", url) 481 | if vaccineDashboard.status_code == 200: 482 | break 483 | vaccineDashboard = vaccineDashboard.json() 484 | if not vaccineDashboard: 485 | return 486 | 487 | 488 | category = vaccineDashboard['topBlock']['vaccination'] 489 | if 'vaccinationByAge' in vaccineDashboard.keys(): 490 | category = vaccineDashboard['vaccinationByAge'] 491 | 492 | print("{}, {}, \"{}\", {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \ 493 | format(todayStr, \ 494 | stateKeys[str(state_code)], \ 495 | districtName, \ 496 | vaccineDashboard['topBlock']['vaccination']['today'], \ 497 | vaccineDashboard['topBlock']['vaccination']['total'], \ 498 | vaccineDashboard['topBlock']['sessions']['total'], \ 499 | vaccineDashboard['topBlock']['sites']['total'], \ 500 | vaccineDashboard['topBlock']['vaccination']['tot_dose_1'], \ 501 | vaccineDashboard['topBlock']['vaccination']['tot_dose_2'], \ 502 | vaccineDashboard['topBlock']['vaccination']['male'], \ 503 | vaccineDashboard['topBlock']['vaccination']['female'], \ 504 | vaccineDashboard['topBlock']['vaccination']['others'], \ 505 | vaccineDashboard['topBlock']['vaccination']['covaxin'], \ 506 | vaccineDashboard['topBlock']['vaccination']['covishield'], \ 507 | vaccineDashboard['topBlock']['vaccination']['sputnik'], \ 508 | vaccineDashboard['topBlock']['vaccination']['aefi'], \ 509 | category['vac_18_45'], \ 510 | category['vac_45_60'], \ 511 | category['above_60'] 512 | ) 513 | ) 514 | 515 | 516 | with open('output2.out','a') as file: 517 | print("{}, {}, \"{}\", {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \ 518 | format(todayStr, \ 519 | stateKeys[str(state_code)], \ 520 | districtName, \ 521 | vaccineDashboard['topBlock']['vaccination']['today'], \ 522 | vaccineDashboard['topBlock']['vaccination']['total'], \ 523 | vaccineDashboard['topBlock']['sessions']['total'], \ 524 | vaccineDashboard['topBlock']['sites']['total'], \ 525 | vaccineDashboard['topBlock']['vaccination']['tot_dose_1'], \ 526 | vaccineDashboard['topBlock']['vaccination']['tot_dose_2'], \ 527 | vaccineDashboard['topBlock']['vaccination']['male'], \ 528 | vaccineDashboard['topBlock']['vaccination']['female'], \ 529 | vaccineDashboard['topBlock']['vaccination']['others'], \ 530 | vaccineDashboard['topBlock']['vaccination']['covaxin'], \ 531 | vaccineDashboard['topBlock']['vaccination']['covishield'], \ 532 | ), file = file) 533 | return vaccineDashboard['getBeneficiariesGroupBy'] 534 | 535 | 536 | def MHGetDataByOcr(): 537 | linesArray = [] 538 | districtDictionary = {} 539 | districtArray = [] 540 | try: 541 | with open(".tmp/mh.txt", "r") as upFile: 542 | isIgnoreFlagSet = False 543 | for line in upFile: 544 | linesArray = line.split('|')[0].split(',') 545 | if 'Total' in line or isIgnoreFlagSet == True: 546 | isIgnoreFlagSet = True 547 | print("--> Ignoring {} ".format(line)) 548 | if len(linesArray) != 6: 549 | print("--> Ignoring due to invalid length: {}".format(linesArray)) 550 | continue 551 | districtDictionary = {} 552 | try: 553 | if is_number(linesArray[0].strip()): 554 | print("--> Ignoring: {}".format(linesArray)) 555 | continue 556 | 557 | districtDictionary['districtName'] = linesArray[0].strip().title() 558 | districtDictionary['confirmed'] = int(linesArray[1]) 559 | districtDictionary['recovered'] = int(linesArray[2]) 560 | districtDictionary['deceased'] = int(linesArray[3]) 561 | districtDictionary['migrated'] = int(linesArray[4]) 562 | districtArray.append(districtDictionary) 563 | except ValueError: 564 | print("--> Ignoring: {}".format(linesArray)) 565 | continue 566 | 567 | upFile.close() 568 | deltaCalculator.getStateDataFromSite("Maharashtra", districtArray, option) 569 | except FileNotFoundError: 570 | print("rj.txt missing. Generate through pdf or ocr and rerun.") 571 | 572 | def MHGetDataByUrl(): 573 | stateDashboard = requests.request("get", metaDictionary['Maharashtra'].url).json() 574 | 575 | districtArray = [] 576 | for districtDetails in stateDashboard: 577 | districtDictionary = {} 578 | districtDictionary['districtName'] = districtDetails['District'] 579 | districtDictionary['confirmed'] = districtDetails['Positive Cases'] 580 | districtDictionary['recovered'] = districtDetails['Recovered'] 581 | districtDictionary['deceased'] = districtDetails['Deceased'] 582 | districtArray.append(districtDictionary) 583 | 584 | deltaCalculator.getStateDataFromSite("Maharashtra", districtArray, option) 585 | 586 | def HPGetData(): 587 | linesArray = [] 588 | districtDictionary = {} 589 | districtArray = [] 590 | districtTableBeingRead = False 591 | try: 592 | with open(".tmp/hp.txt", "r") as upFile: 593 | for line in upFile: 594 | line = re.sub('\*', '', line) 595 | linesArray = line.split('|')[0].split(',') 596 | availableColumns = line.split('|')[1].split(',') 597 | 598 | ''' 599 | if 'Report of Positive Cases till date' in (re.sub(" +", " ", " ".join(linesArray))): 600 | districtTableBeingRead = True 601 | 602 | if districtTableBeingRead == False or 'Total' in linesArray[0]: 603 | districtTableBeingRead = False 604 | continue 605 | ''' 606 | 607 | districtDictionary = {} 608 | confirmedFound = False 609 | recoveredFound = False 610 | deceasedFound = False 611 | ''' 612 | for index, data in enumerate(linesArray): 613 | try: 614 | if availableColumns[index].strip() == "1": 615 | districtDictionary['districtName'] = data.strip() 616 | if availableColumns[index].strip() == "2": 617 | districtDictionary['confirmed'] = int(data.strip()) 618 | confirmedFound = True 619 | if availableColumns[index].strip() == "6": 620 | districtDictionary['recovered'] = int(data.strip()) 621 | recoveredFound = True 622 | if availableColumns[index].strip() == "7": 623 | districtDictionary['deceased'] = int(data.strip()) 624 | deceasedFound = True 625 | except ValueError: 626 | print("--> Ignoring {}".format(linesArray)) 627 | continue 628 | 629 | if recoveredFound == False or confirmedFound == False or deceasedFound == False: 630 | print("--> Issue with {}".format(linesArray)) 631 | continue 632 | ''' 633 | 634 | if len(linesArray) != 11: 635 | print("--> Issue with {}".format(linesArray)) 636 | continue 637 | 638 | districtDictionary['districtName'] = linesArray[0].strip() 639 | districtDictionary['confirmed'] = int(linesArray[1].strip()) 640 | districtDictionary['recovered'] = int(linesArray[8].strip()) 641 | districtDictionary['deceased'] = int(re.sub('\*', '', linesArray[9].strip()).strip()) 642 | #districtDictionary['migrated'] = int(linesArray[10].strip()) 643 | 644 | districtArray.append(districtDictionary) 645 | 646 | upFile.close() 647 | deltaCalculator.getStateDataFromSite("Himachal Pradesh", districtArray, option) 648 | except FileNotFoundError: 649 | print("hp.txt missing. Generate through pdf or ocr and rerun.") 650 | 651 | 652 | def RJGetDataUsingUrl(): 653 | if typeOfAutomation != "ocr" or typeOfAutomation != "pdf": 654 | print("RJ Getdata using url is deprecated") 655 | return 656 | response = requests.request("GET", metaDictionary['Rajasthan'].url) 657 | soup = BeautifulSoup(response.content, 'html.parser') 658 | table = soup.find('blockquote').find('table').find_all('tr') 659 | 660 | 661 | districtArray = [] 662 | for index, rowContent in enumerate(table): 663 | dataPoints = rowContent.find_all("td") 664 | if index == 0 or len(dataPoints) != 7: 665 | continue 666 | districtName = re.sub(' +', ' ', re.sub('\n', ' ', dataPoints[1].get_text().strip())) 667 | 668 | districtDictionary = {} 669 | districtDictionary['districtName'] = districtName 670 | districtDictionary['confirmed'] = int(dataPoints[4].get_text().strip()) 671 | districtDictionary['recovered'] = int(dataPoints[5].get_text().strip()) 672 | districtDictionary['deceased'] = -999 673 | districtArray.append(districtDictionary) 674 | 675 | print(districtArray) 676 | deltaCalculator.getStateDataFromSite("Rajasthan", districtArray, option) 677 | 678 | 679 | def GJGetData(): 680 | response = requests.request("GET", metaDictionary['Gujarat'].url) 681 | soup = BeautifulSoup(response.content, 'html.parser') 682 | table = soup.find("div", {"class": "table-responsive"}).find_all("tr") 683 | 684 | districtArray = [] 685 | for index, row in enumerate(table): 686 | if index == len(table) - 1: 687 | continue 688 | 689 | dataPoints = row.find_all("td") 690 | if(len(dataPoints) != 6): 691 | continue 692 | 693 | districtDictionary = {} 694 | districtDictionary['districtName'] = dataPoints[0].get_text() 695 | districtDictionary['confirmed'] = int(dataPoints[1].get_text().strip()) 696 | districtDictionary['recovered'] = int(dataPoints[3].get_text().strip()) 697 | districtDictionary['deceased'] = int(dataPoints[5].get_text().strip()) 698 | districtArray.append(districtDictionary) 699 | 700 | deltaCalculator.getStateDataFromSite("Gujarat", districtArray, option) 701 | 702 | 703 | def TGGetData(): 704 | linesArray = [] 705 | with open(".tmp/tg.txt", "r") as tgFile: 706 | for line in tgFile: 707 | linesArray = line.split('|')[0].split(',') 708 | if len(linesArray) != 2: 709 | print("--> Issue with {}".format(linesArray)) 710 | continue 711 | if linesArray[0].strip().capitalize() == "Ghmc": 712 | linesArray[0] = "Hyderabad" 713 | print("{},Telangana,TG,{},Hospitalized".format(linesArray[0].strip().title(), linesArray[1].strip())) 714 | 715 | 716 | def UPGetData(): 717 | errorCount = 0 718 | linesArray = [] 719 | districtDictionary = {} 720 | districtArray = [] 721 | masterColumnArray = [] 722 | splitArray = [] 723 | lengthOfArray = 7 724 | activeIndex = 6 725 | recoveredIndex = 3 726 | deceasedIndex = 5 727 | global typeOfAutomation 728 | 729 | if typeOfAutomation == "ocr1": 730 | lengthOfArray = 7 731 | activeIndex = 6 732 | recoveredIndex = 3 733 | deceasedIndex = 5 734 | else: 735 | typeOfAutomation = "ocr2" 736 | lengthOfArray = 8 737 | activeIndex = 7 738 | recoveredIndex = 4 739 | deceasedIndex = 6 740 | print("--> Using format {}".format(typeOfAutomation)) 741 | 742 | try: 743 | with open(".tmp/up.txt", "r") as upFile: 744 | for line in upFile: 745 | splitArray = re.sub('\n', '', line.strip()).split('|') 746 | linesArray = splitArray[0].split(',') 747 | 748 | if errorCount > 10: 749 | errorCount = 0 750 | if typeOfAutomation == "ocr1": 751 | typeOfAutomation = "ocr2" 752 | else: 753 | typeOfAutomation = "ocr1" 754 | print("--> Switching to version {}. Error count breached.".format(typeOfAutomation)) 755 | UPGetData() 756 | return 757 | 758 | if len(linesArray) != lengthOfArray: 759 | print("--> Issue with {}".format(linesArray)) 760 | errorCount += 1 761 | continue 762 | 763 | districtDictionary = {} 764 | districtDictionary['districtName'] = linesArray[0].strip() 765 | districtDictionary['confirmed'] = int(linesArray[recoveredIndex]) + int(linesArray[deceasedIndex]) + int(linesArray[activeIndex]) 766 | districtDictionary['recovered'] = int(linesArray[recoveredIndex]) 767 | districtDictionary['deceased'] = int(linesArray[deceasedIndex]) 768 | # districtDictionary['active'] = int(linesArray[activeIndex]) 769 | """ 770 | 771 | districtDictionary['confirmed'] = int(linesArray[2]) 772 | districtDictionary['recovered'] = int(linesArray[4]) 773 | districtDictionary['deceased'] = int(linesArray[6]) 774 | """ 775 | 776 | districtArray.append(districtDictionary) 777 | upFile.close() 778 | 779 | deltaCalculator.getStateDataFromSite("Uttar Pradesh", districtArray, option) 780 | except FileNotFoundError: 781 | print("up.txt missing. Generate through pdf or ocr and rerun.") 782 | 783 | def UTGetData(): 784 | linesArray = [] 785 | districtDictionary = {} 786 | districtArray = [] 787 | ignoreLines = False 788 | try: 789 | with open(".tmp/ut.txt", "r") as upFile: 790 | for line in upFile: 791 | if ignoreLines == True: 792 | continue 793 | 794 | if 'Total' in line: 795 | ignoreLines = True 796 | continue 797 | 798 | linesArray = line.split('|')[0].split(',') 799 | if len(linesArray) != 6: 800 | print("--> Issue with {}".format(linesArray)) 801 | continue 802 | districtDictionary = {} 803 | districtDictionary['districtName'] = linesArray[0].strip() 804 | districtDictionary['confirmed'] = int(linesArray[1]) 805 | districtDictionary['recovered'] = int(linesArray[2]) 806 | districtDictionary['deceased'] = int(linesArray[4]) 807 | districtDictionary['migrated'] = int(linesArray[5]) 808 | districtArray.append(districtDictionary) 809 | 810 | upFile.close() 811 | deltaCalculator.getStateDataFromSite("Uttarakhand", districtArray, option) 812 | except FileNotFoundError: 813 | print("br.txt missing. Generate through pdf or ocr and rerun.") 814 | 815 | def BRGetData(): 816 | linesArray = [] 817 | districtDictionary = {} 818 | districtArray = [] 819 | try: 820 | with open(".tmp/br.txt", "r") as upFile: 821 | for line in upFile: 822 | linesArray = line.split('|')[0].split(',') 823 | if len(linesArray) != 5: 824 | print("--> Issue with {}".format(linesArray)) 825 | continue 826 | districtDictionary = {} 827 | districtDictionary['districtName'] = linesArray[0] 828 | districtDictionary['confirmed'] = int(linesArray[1]) 829 | districtDictionary['recovered'] = int(linesArray[2]) 830 | districtDictionary['deceased'] = int(linesArray[3]) 831 | districtArray.append(districtDictionary) 832 | 833 | upFile.close() 834 | deltaCalculator.getStateDataFromSite("Bihar", districtArray, option) 835 | except FileNotFoundError: 836 | print("br.txt missing. Generate through pdf or ocr and rerun.") 837 | 838 | def JHGetData(): 839 | if typeOfAutomation == "ocr": 840 | JHGetDataByOCR() 841 | else: 842 | JHGetDataByURL2() 843 | 844 | def JHGetDataByURL(): 845 | 846 | url = "https://covid19dashboard.jharkhand.gov.in/Home/mapData?action=\"total\"&district_id=0" 847 | 848 | payload="action=total&district_id=0" 849 | headers = { 850 | 'Origin': 'https://covid19dashboard.jharkhand.gov.in', 851 | 'Referer': 'https://covid19dashboard.jharkhand.gov.in/', 852 | 'Host': 'covid19dashboard.jharkhand.gov.in', 853 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 854 | 'Content-Length': '26', 855 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 856 | 'Cookie': 'ci_session=9n007bqlfk2q8joa9igknjpq6vpl2gmm' 857 | } 858 | 859 | response = requests.request("POST", url, headers=headers, data=payload).json() 860 | 861 | districtArray = [] 862 | for data in response: 863 | districtDictionary = {} 864 | districtDictionary['districtName'] = data['district'] 865 | districtDictionary['confirmed'] = int(data['positive_cases']) 866 | districtDictionary['recovered'] = int(data['recovered_cases']) 867 | districtDictionary['deceased'] = int(data['deaths']) 868 | print(districtDictionary) 869 | districtArray.append(districtDictionary) 870 | 871 | deltaCalculator.getStateDataFromSite("Jharkhand", districtArray, option) 872 | 873 | def JHGetDataByURL2(): 874 | url = "https://covid19dashboard.jharkhand.gov.in/Bulletin/GetTestCaseData?date=2021-03-25" 875 | 876 | payload="date=" + (datetime.date.today() - datetime.timedelta(days = 0)).strftime("%Y-%m-%d") 877 | headers = { 878 | 'Host': 'covid19dashboard.jharkhand.gov.in', 879 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 880 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 881 | 'Content-Length': '15', 882 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 883 | 'Cookie': 'ci_session=i6qt39o41i7gsopt23ipm083hla6994c' 884 | } 885 | 886 | response = requests.request("POST", url, headers=headers, data=payload) 887 | soup = BeautifulSoup(response.content, 'html.parser') 888 | districts = soup.find("table").find_all("tr") 889 | 890 | districtArray = [] 891 | 892 | districtStart = False 893 | for district in districts: 894 | 895 | if "Bokaro" in district.get_text() and districtStart == False: 896 | districtStart = True 897 | 898 | if districtStart == False: 899 | continue 900 | 901 | data = district.find_all("td") 902 | 903 | if int(data[3].get_text()) != 0: 904 | print("{},Jharkhand,JH,{},Hospitalized".format(data[1].get_text(), data[3].get_text())) 905 | if int(data[4].get_text()) != 0: 906 | print("{},Jharkhand,JH,{},Recovered".format(data[1].get_text(), data[4].get_text())) 907 | if int(data[6].get_text()) != 0: 908 | print("{},Jharkhand,JH,{},Deceased".format(data[1].get_text(), data[6].get_text())) 909 | 910 | 911 | 912 | def JHGetDataByOCR(): 913 | linesArray = [] 914 | districtDictionary = {} 915 | districtArray = [] 916 | try: 917 | with open(".tmp/jh.txt", "r") as upFile: 918 | for line in upFile: 919 | linesArray = line.split('|')[0].split(',') 920 | if len(linesArray) != 8: 921 | print("--> Issue with {}".format(linesArray)) 922 | continue 923 | 924 | districtDictionary = {} 925 | districtDictionary['districtName'] = linesArray[0].strip() 926 | districtDictionary['confirmed'] = int(linesArray[4]) + int(linesArray[5]) 927 | districtDictionary['recovered'] = int(linesArray[2]) + int(linesArray[6]) 928 | districtDictionary['deceased'] = int(linesArray[3]) + int(linesArray[7]) 929 | 930 | districtArray.append(districtDictionary) 931 | upFile.close() 932 | deltaCalculator.getStateDataFromSite("Jharkhand", districtArray, option) 933 | except FileNotFoundError: 934 | print("jh.txt missing. Generate through pdf or ocr and rerun.") 935 | 936 | def RJGetData(): 937 | linesArray = [] 938 | districtDictionary = {} 939 | districtArray = [] 940 | skipValues = False 941 | try: 942 | with open(".tmp/rj.txt", "r") as upFile: 943 | for line in upFile: 944 | if 'Other' in line: 945 | skipValues = True 946 | continue 947 | if skipValues == True: 948 | continue 949 | 950 | linesArray = line.split('|')[0].split(',') 951 | 952 | if len(linesArray) != 9: 953 | print("--> Issue with {}".format(linesArray)) 954 | continue 955 | 956 | districtDictionary = {} 957 | districtDictionary['districtName'] = linesArray[0].strip().title() 958 | districtDictionary['confirmed'] = int(linesArray[3]) 959 | districtDictionary['recovered'] = int(linesArray[7]) 960 | districtDictionary['deceased'] = int(linesArray[5]) 961 | districtArray.append(districtDictionary) 962 | 963 | upFile.close() 964 | deltaCalculator.getStateDataFromSite("Rajasthan", districtArray, option) 965 | except FileNotFoundError: 966 | print("rj.txt missing. Generate through pdf or ocr and rerun.") 967 | 968 | 969 | def MPGetData(): 970 | linesArray = [] 971 | districtDictionary = {} 972 | districtArray = [] 973 | try: 974 | with open(".tmp/mp.txt", "r") as upFile: 975 | isIgnoreFlagSet = False 976 | for line in upFile: 977 | linesArray = line.split('|')[0].split(',') 978 | if 'Total' in line or isIgnoreFlagSet == True: 979 | isIgnoreFlagSet = True 980 | print("--> Ignoring {} ".format(line)) 981 | if len(linesArray) != 8: 982 | print("--> Ignoring due to invalid length: {}".format(linesArray)) 983 | continue 984 | districtDictionary = {} 985 | try: 986 | if is_number(linesArray[0].strip()): 987 | print("--> Ignoring: {}".format(linesArray)) 988 | continue 989 | 990 | districtDictionary['districtName'] = linesArray[0].strip().title() 991 | districtDictionary['confirmed'] = int(linesArray[2]) 992 | districtDictionary['recovered'] = int(linesArray[6]) 993 | districtDictionary['deceased'] = int(linesArray[4]) 994 | districtArray.append(districtDictionary) 995 | except ValueError: 996 | print("--> Ignoring: {}".format(linesArray)) 997 | continue 998 | 999 | upFile.close() 1000 | deltaCalculator.getStateDataFromSite("Madhya Pradesh", districtArray, option) 1001 | except FileNotFoundError: 1002 | print("rj.txt missing. Generate through pdf or ocr and rerun.") 1003 | 1004 | def JKGetData(): 1005 | linesArray = [] 1006 | districtDictionary = {} 1007 | districtArray = [] 1008 | try: 1009 | with open(".tmp/jk.txt", "r") as upFile: 1010 | isIgnoreFlagSet = False 1011 | for line in upFile: 1012 | linesArray = line.split('|')[0].split(',') 1013 | if len(linesArray) != 11: 1014 | print("--> Ignoring due to invalid length: {}".format(linesArray)) 1015 | continue 1016 | districtDictionary = {} 1017 | try: 1018 | if is_number(linesArray[0].strip()): 1019 | print("--> Ignoring: {}".format(linesArray)) 1020 | continue 1021 | 1022 | districtDictionary['districtName'] = linesArray[0].strip().title() 1023 | districtDictionary['confirmed'] = int(linesArray[6]) 1024 | districtDictionary['recovered'] = int(linesArray[9]) 1025 | districtDictionary['deceased'] = int(linesArray[10]) 1026 | districtArray.append(districtDictionary) 1027 | except ValueError: 1028 | print("--> Ignoring: {}".format(linesArray)) 1029 | continue 1030 | 1031 | upFile.close() 1032 | deltaCalculator.getStateDataFromSite("Jammu and Kashmir", districtArray, option) 1033 | except FileNotFoundError: 1034 | print("rj.txt missing. Generate through pdf or ocr and rerun.") 1035 | 1036 | def WBGetData(): 1037 | linesArray = [] 1038 | districtDictionary = {} 1039 | districtArray = [] 1040 | readFileFromURLV2(metaDictionary['West Bengal'].url, "West Bengal", "Alipurduar", "TOTAL") 1041 | try: 1042 | with open(".tmp/wb.csv", "r") as upFile: 1043 | for line in upFile: 1044 | linesArray = line.split(',') 1045 | if len(linesArray) != 4: 1046 | print("--> Issue with {}".format(linesArray)) 1047 | continue 1048 | districtDictionary = {} 1049 | districtDictionary['districtName'] = linesArray[0].strip() 1050 | districtDictionary['confirmed'] = int(linesArray[1]) 1051 | districtDictionary['recovered'] = int(linesArray[2]) 1052 | districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 1053 | districtArray.append(districtDictionary) 1054 | 1055 | upFile.close() 1056 | deltaCalculator.getStateDataFromSite("West Bengal", districtArray, option) 1057 | except FileNotFoundError: 1058 | print("wb.txt missing. Generate through pdf or ocr and rerun.") 1059 | 1060 | def PBGetDataThroughPdf(): 1061 | linesArray = [] 1062 | districtDictionary = {} 1063 | districtArray = [] 1064 | readFileFromURLV2(metaDictionary['Punjab'].url, "Punjab", "Ludhiana", "Total") 1065 | try: 1066 | with open(".tmp/pb.csv", "r") as upFile: 1067 | for line in upFile: 1068 | linesArray = line.split(',') 1069 | if len(linesArray) != 5: 1070 | print("--> Issue with {}".format(linesArray)) 1071 | continue 1072 | districtDictionary = {} 1073 | districtDictionary['districtName'] = linesArray[0].strip() 1074 | districtDictionary['confirmed'] = int(linesArray[1]) 1075 | districtDictionary['recovered'] = int(linesArray[3]) 1076 | districtDictionary['deceased'] = int(linesArray[4]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 1077 | districtArray.append(districtDictionary) 1078 | 1079 | upFile.close() 1080 | deltaCalculator.getStateDataFromSite("Punjab", districtArray, option) 1081 | except FileNotFoundError: 1082 | print("pb.txt missing. Generate through pdf or ocr and rerun.") 1083 | 1084 | def PBGetData(): 1085 | if typeOfAutomation == "pdf": 1086 | PBGetDataThroughPdf() 1087 | else: 1088 | PBGetDataThroughOcr() 1089 | 1090 | def PBGetDataThroughOcr(): 1091 | linesArray = [] 1092 | districtDictionary = {} 1093 | districtArray = [] 1094 | secondRunArray = [] 1095 | masterColumnList = "" 1096 | masterColumnArray = [] 1097 | splitArray = [] 1098 | try: 1099 | with open(".tmp/pb.txt", "r") as upFile: 1100 | for line in upFile: 1101 | splitArray = re.sub('\n', '', line.strip()).split('|') 1102 | linesArray = splitArray[0].split(',') 1103 | 1104 | if len(linesArray) != 5: 1105 | print("--> Issue with {}".format(linesArray)) 1106 | continue 1107 | if linesArray[0].strip() == "Total": 1108 | continue 1109 | districtDictionary = {} 1110 | districtDictionary['districtName'] = linesArray[0].strip() 1111 | districtDictionary['confirmed'] = int(linesArray[1]) 1112 | districtDictionary['recovered'] = int(linesArray[3]) 1113 | districtDictionary['deceased'] = int(linesArray[4]) 1114 | districtArray.append(districtDictionary) 1115 | 1116 | upFile.close() 1117 | 1118 | deltaCalculator.getStateDataFromSite("Punjab", districtArray, option) 1119 | except FileNotFoundError: 1120 | print("pb.txt missing. Generate through pdf or ocr and rerun.") 1121 | 1122 | def KAGetData(): 1123 | if typeOfAutomation == "ocr": 1124 | KAGetDataByOCR() 1125 | else: 1126 | KAGetDataByUrl() 1127 | 1128 | 1129 | def KAGetDataByOCR(): 1130 | districtArray = [] 1131 | linesArray = [] 1132 | with open(".tmp/ka.txt") as kaFile: 1133 | for line in kaFile: 1134 | line = line.replace('"', '').replace('*', '').replace('#', '').replace('$', '') 1135 | linesArray = line.split('|')[0].split(',') 1136 | if len(linesArray) != 9: 1137 | print("--> Issue with {}".format(linesArray)) 1138 | continue 1139 | 1140 | districtDictionary = {} 1141 | districtDictionary['districtName'] = linesArray[0].strip() 1142 | districtDictionary['confirmed'] = int(linesArray[2]) 1143 | districtDictionary['recovered'] = int(linesArray[4]) 1144 | districtDictionary['deceased'] = int(linesArray[7]) if len(re.sub('\n', '', linesArray[7])) != 0 else 0 1145 | districtArray.append(districtDictionary) 1146 | kaFile.close() 1147 | deltaCalculator.getStateDataFromSite("Karnataka", districtArray, option) 1148 | 1149 | def KAGetDataByUrl(): 1150 | global pdfUrl 1151 | global pageId 1152 | linesArray = [] 1153 | districtDictionary = {} 1154 | districtArray = [] 1155 | runDeceased = False 1156 | startId = 0 1157 | endId = 0 1158 | 1159 | if ',' in pageId: 1160 | startId = pageId.split(',')[1] 1161 | endId = pageId.split(',')[2] 1162 | pageId = pageId.split(',')[0] 1163 | runDeceased = True 1164 | 1165 | if len(pdfUrl) != 0: 1166 | urlArray = pdfUrl.split('/') 1167 | for index, parts in enumerate(urlArray): 1168 | if parts == "file": 1169 | if urlArray[index + 1] == "d": 1170 | fileId = urlArray[index + 2] 1171 | break 1172 | pdfUrl = "https://docs.google.com/uc?export=download&id=" + fileId 1173 | print("--> Downloading using: {}".format(pdfUrl)) 1174 | readFileFromURLV2('', "Karnataka", "Bagalakote", "Total") 1175 | try: 1176 | with open(".tmp/ka.csv", "r") as upFile: 1177 | for line in upFile: 1178 | linesArray = line.split(',') 1179 | if len(linesArray) != 4: 1180 | print("--> Issue with {}".format(linesArray)) 1181 | continue 1182 | districtDictionary = {} 1183 | districtDictionary['districtName'] = linesArray[0].strip() 1184 | districtDictionary['confirmed'] = int(linesArray[1]) 1185 | districtDictionary['recovered'] = int(linesArray[2]) 1186 | districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 1187 | districtArray.append(districtDictionary) 1188 | 1189 | upFile.close() 1190 | deltaCalculator.getStateDataFromSite("Karnataka", districtArray, option) 1191 | 1192 | if runDeceased == True: 1193 | os.system("python3 kaautomation.py d " + str(startId) + " " + str(endId) + " && cat kaconfirmed.csv") 1194 | 1195 | except FileNotFoundError: 1196 | print("ka.txt missing. Generate through pdf or ocr and rerun.") 1197 | 1198 | def HRGetData(): 1199 | linesArray = [] 1200 | districtDictionary = {} 1201 | districtArray = [] 1202 | if typeOfAutomation == "pdf": 1203 | readFileFromURLV2(metaDictionary['Haryana'].url, "Haryana", "Gurugram", "Total") 1204 | try: 1205 | with open(".tmp/hr.csv", "r") as upFile: 1206 | for line in upFile: 1207 | linesArray = line.split(',') 1208 | if len(linesArray) != 4: 1209 | print("--> Issue with {}".format(linesArray)) 1210 | continue 1211 | 1212 | districtDictionary = {} 1213 | districtDictionary['districtName'] = linesArray[0].strip() 1214 | districtDictionary['confirmed'] = int(linesArray[1]) 1215 | districtDictionary['recovered'] = int(linesArray[2]) 1216 | districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 1217 | districtArray.append(districtDictionary) 1218 | 1219 | upFile.close() 1220 | deltaCalculator.getStateDataFromSite("Haryana", districtArray, option) 1221 | except FileNotFoundError: 1222 | print("hr.csv missing. Generate through pdf or ocr and rerun.") 1223 | 1224 | def TNGetData(): 1225 | linesArray = [] 1226 | districtDictionary = {} 1227 | districtArray = [] 1228 | if typeOfAutomation == "ocr": 1229 | getTNDataThroughOcr() 1230 | return 1231 | else: 1232 | convertTnPDFToCSV() 1233 | try: 1234 | with open(".tmp/tn.csv", "r") as upFile: 1235 | for line in upFile: 1236 | linesArray = line.split(',') 1237 | if len(linesArray) != 4: 1238 | print("--> Issue with {}".format(linesArray)) 1239 | continue 1240 | linesArray[3] = linesArray[3].replace('$', '') 1241 | districtDictionary = {} 1242 | districtDictionary['districtName'] = linesArray[0].strip() 1243 | districtDictionary['confirmed'] = int(linesArray[1]) 1244 | districtDictionary['recovered'] = int(linesArray[2]) 1245 | districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0 1246 | districtArray.append(districtDictionary) 1247 | 1248 | upFile.close() 1249 | deltaCalculator.getStateDataFromSite("Tamil Nadu", districtArray, option) 1250 | except FileNotFoundError: 1251 | print("tn.txt missing. Generate through pdf or ocr and rerun.") 1252 | 1253 | def getTNDataThroughOcr(): 1254 | districtArray = [] 1255 | linesArray = [] 1256 | airportDictionary = {'districtName': 'Airport Quarantine', "confirmed": 0, "recovered": 0, "deceased": 0} 1257 | with open(".tmp/tn.txt") as tnFile: 1258 | for line in tnFile: 1259 | line = line.replace('"', '').replace('*', '').replace('#', '').replace('$', '') 1260 | linesArray = line.split('|')[0].split(',') 1261 | if len(linesArray) != 5: 1262 | print("--> Issue with {}".format(linesArray)) 1263 | continue 1264 | 1265 | if 'Airport' in line: 1266 | airportDictionary['confirmed'] += int(linesArray[1]) 1267 | airportDictionary['recovered'] += int(linesArray[2]) 1268 | airportDictionary['deceased'] += int(linesArray[4]) if len(re.sub('\n', '', linesArray[4])) != 0 else 0 1269 | continue 1270 | 1271 | if 'Railway' in line: 1272 | linesArray[0] = 'Railway Quarantine' 1273 | 1274 | districtDictionary = {} 1275 | districtDictionary['districtName'] = linesArray[0].strip() 1276 | districtDictionary['confirmed'] = int(linesArray[1]) 1277 | districtDictionary['recovered'] = int(linesArray[2]) 1278 | districtDictionary['deceased'] = int(linesArray[4]) if len(re.sub('\n', '', linesArray[4])) != 0 else 0 1279 | districtArray.append(districtDictionary) 1280 | 1281 | districtArray.append(airportDictionary) 1282 | tnFile.close() 1283 | deltaCalculator.getStateDataFromSite("Tamil Nadu", districtArray, option) 1284 | 1285 | 1286 | 1287 | def NLGetData(): 1288 | districtArray = [] 1289 | if typeOfAutomation == "ocr": 1290 | try: 1291 | with open(".tmp/nl.txt", "r") as upFile: 1292 | for line in upFile: 1293 | linesArray = line.split('|')[0].split(',') 1294 | if len(linesArray) != 13: 1295 | print("--> Issue with {}".format(linesArray)) 1296 | continue 1297 | 1298 | districtDictionary = {} 1299 | districtDictionary['districtName'] = linesArray[0].strip() 1300 | districtDictionary['confirmed'] = int(linesArray[12]) 1301 | districtDictionary['recovered'] = int(linesArray[7]) 1302 | districtDictionary['migrated'] = int(linesArray[11]) 1303 | districtDictionary['deceased'] = int(linesArray[8]) if len(re.sub('\n', '', linesArray[8])) != 0 else 0 1304 | districtArray.append(districtDictionary) 1305 | 1306 | upFile.close() 1307 | deltaCalculator.getStateDataFromSite("Nagaland", districtArray, option) 1308 | except FileNotFoundError: 1309 | print("hr.csv missing. Generate through pdf or ocr and rerun.") 1310 | 1311 | def GAGetData(): 1312 | response = requests.request("GET", metaDictionary['Goa'].url) 1313 | soup = BeautifulSoup(response.content, 'html.parser') 1314 | table = soup.find_all("div", {"class": "vc_col-md-2"}) 1315 | 1316 | districtArray = [] 1317 | for index, row in enumerate(table): 1318 | print(row.get_text()) 1319 | 1320 | districtDictionary = {} 1321 | districtArray.append(districtDictionary) 1322 | 1323 | return 1324 | deltaCalculator.getStateDataFromSite("Goa", districtArray, option) 1325 | 1326 | 1327 | def ASGetDataThroughOCR(): 1328 | linesArray = [] 1329 | districtDictionary = {} 1330 | districtArray = [] 1331 | splitArray = [] 1332 | try: 1333 | with open(".tmp/as.txt", "r") as upFile: 1334 | for line in upFile: 1335 | splitArray = re.sub('\n', '', line.strip()).split('|') 1336 | linesArray = splitArray[0].split(',') 1337 | if int(linesArray[len(linesArray) - 1]) > 0: 1338 | print("{},Assam,AS,{},Hospitalized".format(linesArray[0].strip(), linesArray[len(linesArray) - 1].strip())) 1339 | 1340 | except FileNotFoundError: 1341 | print("pb.txt missing. Generate through pdf or ocr and rerun.") 1342 | 1343 | def ASGetData(): 1344 | if typeOfAutomation == "ocr": 1345 | ASGetDataThroughOCR() 1346 | return 1347 | response = requests.request("GET", metaDictionary['Assam'].url) 1348 | soup = BeautifulSoup(response.content, 'html.parser') 1349 | table = soup.find("tbody").find_all("tr") 1350 | 1351 | districtArray = [] 1352 | for index, row in enumerate(table): 1353 | dataPoints = row.find_all("td") 1354 | 1355 | districtDictionary = {} 1356 | districtDictionary['districtName'] = dataPoints[0].get_text().strip() 1357 | districtDictionary['confirmed'] = int(dataPoints[1].get_text().strip()) if '-' not in dataPoints[1].get_text().strip() else 0 1358 | districtDictionary['recovered'] = int(dataPoints[3].get_text().strip()) if '-' not in dataPoints[3].get_text().strip() else 0 1359 | districtDictionary['deceased'] = int(dataPoints[4].get_text().strip()) if '-' not in dataPoints[4].get_text().strip() else 0 1360 | districtArray.append(districtDictionary) 1361 | 1362 | deltaCalculator.getStateDataFromSite("Assam", districtArray, option) 1363 | 1364 | def TRGetData(): 1365 | response = requests.request("GET", metaDictionary['Tripura'].url) 1366 | soup = BeautifulSoup(response.content, 'html.parser') 1367 | table = soup.find("tbody").find_all("tr") 1368 | 1369 | districtArray = [] 1370 | for index, row in enumerate(table): 1371 | dataPoints = row.find_all("td") 1372 | 1373 | districtDictionary = {} 1374 | districtDictionary['districtName'] = dataPoints[1].get_text().strip() 1375 | districtDictionary['confirmed'] = int(dataPoints[8].get_text().strip()) 1376 | districtDictionary['recovered'] = int(dataPoints[10].get_text().strip()) 1377 | districtDictionary['deceased'] = int(dataPoints[12].get_text().strip()) 1378 | districtArray.append(districtDictionary) 1379 | 1380 | deltaCalculator.getStateDataFromSite("Tripura", districtArray, option) 1381 | 1382 | def PYGetData(): 1383 | response = requests.request("GET", metaDictionary['Puducherry'].url) 1384 | soup = BeautifulSoup(response.content, 'html.parser') 1385 | table = soup.find_all("tbody")[1].find_all("tr") 1386 | 1387 | districtArray = [] 1388 | for index, row in enumerate(table): 1389 | dataPoints = row.find_all("td") 1390 | 1391 | districtDictionary = {} 1392 | districtDictionary['districtName'] = dataPoints[0].get_text().strip() 1393 | districtDictionary['confirmed'] = int(dataPoints[1].get_text().strip()) 1394 | districtDictionary['recovered'] = int(dataPoints[2].get_text().strip()) 1395 | districtDictionary['deceased'] = int(dataPoints[4].get_text().strip()) 1396 | districtArray.append(districtDictionary) 1397 | 1398 | deltaCalculator.getStateDataFromSite("Puducherry", districtArray, option) 1399 | 1400 | def CHGetData(): 1401 | response = requests.request("GET", metaDictionary['Chandigarh'].url) 1402 | soup = BeautifulSoup(response.content, 'html.parser') 1403 | divs = soup.find("div", {"class": "col-lg-8 col-md-9 form-group pt-10"}).find_all("div", {"class": "col-md-3"}) 1404 | 1405 | districtDictionary = {} 1406 | districtArray = [] 1407 | districtDictionary['districtName'] = 'Chandigarh' 1408 | 1409 | for index, row in enumerate(divs): 1410 | 1411 | if index > 2: 1412 | continue 1413 | 1414 | dataPoints = row.find("div", {"class": "card-body"}).get_text() 1415 | 1416 | if index == 0: 1417 | districtDictionary['confirmed'] = int(dataPoints) 1418 | if index == 1: 1419 | districtDictionary['recovered'] = int(dataPoints) 1420 | if index == 2: 1421 | districtDictionary['deceased'] = int(dataPoints) 1422 | 1423 | districtArray.append(districtDictionary) 1424 | deltaCalculator.getStateDataFromSite("Chandigarh", districtArray, option) 1425 | 1426 | 1427 | def KLGetData(): 1428 | if typeOfAutomation == "pdf": 1429 | KLGetDataByPDF() 1430 | return 1431 | response = requests.request("GET", 'https://dashboard.kerala.gov.in/index.php') 1432 | sessionId = (response.headers['Set-Cookie']).split(';')[0].split('=')[1] 1433 | 1434 | cookies = { 1435 | '_ga': 'GA1.3.594771251.1592531338', 1436 | '_gid': 'GA1.3.674470591.1592531338', 1437 | 'PHPSESSID': sessionId, 1438 | '_gat_gtag_UA_162482846_1': '1', 1439 | } 1440 | 1441 | headers = { 1442 | 'Connection': 'keep-alive', 1443 | 'Accept': 'application/json, text/javascript, */*; q=0.01', 1444 | 'X-Requested-With': 'XMLHttpRequest', 1445 | 'Sec-Fetch-Site': 'same-origin', 1446 | 'Sec-Fetch-Mode': 'cors', 1447 | 'Sec-Fetch-Dest': 'empty', 1448 | 'Referer': 'https://dashboard.kerala.gov.in/index.php', 1449 | 'Accept-Language': 'en-US,en;q=0.9', 1450 | } 1451 | 1452 | stateDashboard = requests.get(metaDictionary['Kerala'].url, headers=headers, cookies=cookies).json() 1453 | districtArray = [] 1454 | for districtDetails in stateDashboard['features']: 1455 | districtDictionary = {} 1456 | districtDictionary['districtName'] = districtDetails['properties']['District'] 1457 | districtDictionary['confirmed'] = districtDetails['properties']['covid_stat'] 1458 | districtDictionary['recovered'] = districtDetails['properties']['covid_statcured'] 1459 | districtDictionary['deceased'] = districtDetails['properties']['covid_statdeath'] 1460 | districtArray.append(districtDictionary) 1461 | deltaCalculator.getStateDataFromSite("Kerala", districtArray, option) 1462 | 1463 | def KLGetDataByPDF(): 1464 | linesArray = [] 1465 | districtDictionary = {} 1466 | districtArray = [] 1467 | readFileFromURLV2(metaDictionary['Kerala'].url, "Kerala", "District", "Total") 1468 | try: 1469 | with open(".tmp/kl.csv", "r") as upFile: 1470 | for line in upFile: 1471 | linesArray = line.split(',') 1472 | if len(linesArray) != 3: 1473 | print("--> Issue with {}".format(linesArray)) 1474 | continue 1475 | print("{},Kerala,KL,{},Hospitalized".format(linesArray[0].strip().title(), linesArray[1].strip())) 1476 | print("{},Kerala,KL,{},Recovered".format(linesArray[0].strip().title(), linesArray[2].strip())) 1477 | 1478 | upFile.close() 1479 | except FileNotFoundError: 1480 | print("ap.csv missing. Generate through pdf or ocr and rerun.") 1481 | 1482 | 1483 | def KLDGetData(): 1484 | linesArray = [] 1485 | districtDictionary = {} 1486 | districtArray = [] 1487 | readFileFromURLV2(metaDictionary['KeralaDeaths'].url, "KeralaDeaths", "District", "") 1488 | try: 1489 | with open(".tmp/kld.csv", "r") as upFile: 1490 | for line in upFile: 1491 | linesArray = line.split(',') 1492 | if len(linesArray) != 3: 1493 | print("--> Issue with {}".format(linesArray)) 1494 | continue 1495 | gender = "M" if linesArray[2].strip() == "Male" else "F" 1496 | print("{},{},,{},Kerala,KL,1,Deceased".format(linesArray[1], gender, linesArray[0].strip().title())) 1497 | 1498 | 1499 | upFile.close() 1500 | except FileNotFoundError: 1501 | print("ap.csv missing. Generate through pdf or ocr and rerun.") 1502 | 1503 | 1504 | def MLGetData(): 1505 | if typeOfAutomation == "ocr": 1506 | MLGetDataByOCR() 1507 | return 1508 | 1509 | #stateDashboard = requests.get(metaDictionary['Meghalaya'].url).json() 1510 | 1511 | response = requests.request("GET", "https://mbdasankalp.in/auth/local/embed") 1512 | authKey = json.loads(response.text)['key'] 1513 | 1514 | url = "https://mbdasankalp.in/api/elasticsearch/aggregation/or/db/merge?access_token=" + authKey 1515 | 1516 | payload = "{\"aggregation\":{\"XAxisHeaders\":[{\"TagId\":\"5dd151b22fc63e490ca55ad6\",\"Header\":false,\"dbId\":\"5f395a260deffa1bd752be4e\"}],\"IsXaxisParallel\":false,\"YAxisHeaders\":[{\"Operator\":\"COUNT_DISTINCT\",\"isHousehold\":true,\"Header\":false,\"dbId\":\"5f395a260deffa1bd752be4e\"}],\"IsYaxisParallel\":true,\"YAxisFormulae\":[{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"9100b461-5d86-47f9-b11c-6d48f90f9cf9\",\"isFormulaAxis\":true,\"formulaId\":\"5f395d6f0deffa1bd752bee8\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]},{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"5b94c49f-7c8e-4bdf-9c8b-e7af4e53e14d\",\"isFormulaAxis\":true,\"formulaId\":\"5f395dba0deffa1bd752bef2\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]},{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"3a36866c-956d-48b2-a47c-1149a0334f29\",\"isFormulaAxis\":true,\"formulaId\":\"5f395dd80deffa1bd752bef5\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]},{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"a714425e-e78f-4dd7-833a-636a3bb850ca\",\"isFormulaAxis\":true,\"formulaId\":\"5f395d9a0deffa1bd752beef\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]}]},\"dbId\":\"5f395a260deffa1bd752be4e\",\"tagFilters\":[],\"sorting\":{\"axis\":{\"id\":\"5f395d6f0deffa1bd752bee8\",\"axisId\":\"9100b461-5d86-47f9-b11c-6d48f90f9cf9\",\"operator\":\"rowcount\"},\"sort\":{\"orderBy\":\"count\",\"order\":\"desc\"},\"size\":9999,\"enabled\":true,\"histogram\":false,\"timeseries\":false},\"customBins\":[],\"tagStatus\":true,\"boxplot\":false,\"requestedDbs\":{\"5f395a260deffa1bd752be4e\":{}}}" 1517 | headers = { 1518 | 'Origin': 'https://mbdasankalp.in', 1519 | 'Referer': 'https://mbdasankalp.in/render/chart/5f4a8e961dbba63b625ff002?c=f7f7f7&bc=121212&key=' + authKey, 1520 | 'Host': 'mbdasankalp.in', 1521 | 'Content-Type': 'application/json', 1522 | 'Accept': 'application/json, text/plain, */*', 1523 | 'Content-Length': '1399' 1524 | } 1525 | 1526 | response = requests.request("POST", url, headers=headers, data = payload) 1527 | stateDashboard = json.loads(response.text.encode('utf8')) 1528 | 1529 | districtArray = [] 1530 | for data in stateDashboard[0]: 1531 | districtDictionary = {} 1532 | districtDictionary['districtName'] = data["name"] 1533 | for value in data["value"]: 1534 | try: 1535 | if value["formulaId"] == "5f395d6f0deffa1bd752bee8": 1536 | districtDictionary['confirmed'] = int(value["value"]) 1537 | if value["formulaId"] == "5f395dba0deffa1bd752bef2": 1538 | districtDictionary['recovered'] = int(value["value"]) 1539 | if value["formulaId"] == "5f395dd80deffa1bd752bef5": 1540 | districtDictionary['deceased'] = int(value["value"]) 1541 | except KeyError: 1542 | continue 1543 | districtArray.append(districtDictionary) 1544 | deltaCalculator.getStateDataFromSite("Meghalaya", districtArray, option) 1545 | return 1546 | 1547 | districtArray = [] 1548 | for districtDetails in stateDashboard['features']: 1549 | districtDictionary = {} 1550 | districtDictionary['districtName'] = districtDetails['attributes']['Name'] 1551 | districtDictionary['confirmed'] = districtDetails['attributes']['Positive'] 1552 | districtDictionary['recovered'] = districtDetails['attributes']['Recovered'] 1553 | districtDictionary['deceased'] = districtDetails['attributes']['Deceasesd'] 1554 | districtArray.append(districtDictionary) 1555 | deltaCalculator.getStateDataFromSite("Meghalaya", districtArray, option) 1556 | 1557 | def MLGetDataByOCR(): 1558 | districtArray = [] 1559 | with open(".tmp/ml.txt", "r") as mlFile: 1560 | for line in mlFile: 1561 | linesArray = line.split('|')[0].split(',') 1562 | if len(linesArray) != 8: 1563 | print("--> Issue with {}".format(linesArray)) 1564 | continue 1565 | 1566 | districtDictionary = {} 1567 | districtDictionary['districtName'] = linesArray[0].strip() 1568 | districtDictionary['confirmed'] = int(linesArray[5].strip()) 1569 | districtDictionary['recovered'] = int(linesArray[6].strip()) 1570 | districtDictionary['deceased'] = int(linesArray[7]) if len(re.sub('\n', '', linesArray[7])) != 0 else 0 1571 | districtArray.append(districtDictionary) 1572 | deltaCalculator.getStateDataFromSite("Meghalaya", districtArray, option) 1573 | 1574 | def MNGetData(): 1575 | districtArray = [] 1576 | with open(".tmp/mn.txt") as mnFile: 1577 | for line in mnFile: 1578 | linesArray = line.split('|')[0].split(',') 1579 | if len(linesArray) != 8: 1580 | print("--> Issue with {}".format(linesArray)) 1581 | continue 1582 | 1583 | if (linesArray[2].strip()) != "0": 1584 | print("{},Manipur,MN,{},Hospitalized".format(linesArray[0].strip().title(), linesArray[2].strip())) 1585 | if (linesArray[4].strip()) != "0": 1586 | print("{},Manipur,MN,{},Deceased".format(linesArray[0].strip().title(), linesArray[4].strip())) 1587 | 1588 | mnFile.close() 1589 | 1590 | def MZGetData(): 1591 | districtArray = [] 1592 | with open(".tmp/mz.txt") as mzFile: 1593 | for line in mzFile: 1594 | line = line.replace('Nil', '0') 1595 | linesArray = line.split('|')[0].split(',') 1596 | if len(linesArray) != 5: 1597 | print("--> Issue with {}".format(linesArray)) 1598 | continue 1599 | 1600 | districtDictionary = {} 1601 | districtDictionary['districtName'] = linesArray[0].strip() 1602 | districtDictionary['confirmed'] = int(linesArray[4]) #+ int(linesArray[2]) + int(linesArray[3]) 1603 | districtDictionary['recovered'] = int(linesArray[2]) 1604 | districtDictionary['deceased'] = int(linesArray[3]) #if len(re.sub('\n', '', linesArray[3])) != 0 else 0 1605 | districtArray.append(districtDictionary) 1606 | 1607 | mzFile.close() 1608 | deltaCalculator.getStateDataFromSite("Mizoram", districtArray, option) 1609 | 1610 | 1611 | def LAGetData(): 1612 | response = requests.request("GET", metaDictionary['Ladakh'].url) 1613 | soup = BeautifulSoup(response.content, 'html.parser') 1614 | table = soup.find("table", id = "tableCovidData2").find_all("tr") 1615 | 1616 | districtArray = [] 1617 | districtDictionary = {} 1618 | confirmed = table[9].find_all("td")[1] 1619 | discharged = table[11].find_all("td")[1] 1620 | confirmedArray = dischargedArray = [] 1621 | confirmedArray = re.sub('\\r', '', re.sub(':', '', re.sub(' +', ' ', re.sub("\n", " ", confirmed.get_text().strip())))).split(' ') 1622 | dischargedArray = re.sub('\\r', '', re.sub(':', '', re.sub(' +', ' ', re.sub("\n", " ", discharged.get_text().strip())))).split(' ') 1623 | 1624 | districtDictionary['districtName'] = confirmedArray[0] 1625 | districtDictionary['confirmed'] = int(confirmedArray[1]) 1626 | districtDictionary['recovered'] = int(dischargedArray[1]) 1627 | districtDictionary['deceased'] = -999 1628 | districtArray.append(districtDictionary) 1629 | 1630 | districtDictionary = {} 1631 | districtDictionary['districtName'] = confirmedArray[2] 1632 | districtDictionary['confirmed'] = int(confirmedArray[3]) 1633 | districtDictionary['recovered'] = int(dischargedArray[3]) 1634 | districtDictionary['deceased'] = -999 1635 | districtArray.append(districtDictionary) 1636 | 1637 | deltaCalculator.getStateDataFromSite("Ladakh", districtArray, option) 1638 | 1639 | def VCMFormatLine(row): 1640 | 1641 | state = " " 1642 | firstDose = 0 1643 | secondDose = 0 1644 | totalDose = 0 1645 | 1646 | if len(row) < 5: 1647 | row = re.sub("\s+", " ", " ".join(row)).split(" ") 1648 | state = row[1] 1649 | firstDose = re.sub(",", "", row[2]) 1650 | secondDose = re.sub(",", "", row[3]) 1651 | totalDose = re.sub(",", "", row[4]) 1652 | 1653 | return state + "," + firstDose + "," + secondDose + "," + totalDose + "\n" 1654 | #return row[1] + "," + re.sub(",", "", row[2]) + "," + re.sub(",", "", row[3]) + "," + re.sub(",", "", row[4]) + "\n" 1655 | 1656 | def PBFormatLine(row): 1657 | return row[1] + "," + row[2] + "," + row[3] + "," + row[4] + "," + row[5] + "\n" 1658 | 1659 | def KLFormatLine(row): 1660 | return row[0] + "," + row[1] + "," + row[2] + "\n" 1661 | 1662 | def KLDFormatLine(row): 1663 | return row[1] + "," + row[4] + "," + row[5] + "\n" 1664 | 1665 | 1666 | def KAFormatLine(row): 1667 | district = "" 1668 | modifiedRow = [] 1669 | for value in row: 1670 | if len(value) > 0: 1671 | modifiedRow.append(value) 1672 | 1673 | if is_number(modifiedRow[0]) == False: 1674 | district = " ".join(re.sub(' +', ' ', modifiedRow[0]).split(' ')[1:]) 1675 | modifiedRow.insert(0, 'a') 1676 | else: 1677 | district = re.sub('\*', '', modifiedRow[1]) 1678 | print(modifiedRow) 1679 | 1680 | return district + "," + modifiedRow[3] + "," + modifiedRow[5] + "," + modifiedRow[8] + "\n" 1681 | 1682 | """ 1683 | def HRFormatLine(line): 1684 | line = re.sub(' +', ',', re.sub('^ +', '', line)) 1685 | 1686 | linesArray = line.split(',') 1687 | 1688 | if len(linesArray) > 1 and linesArray[1] == "Charkhi": 1689 | linesArray.remove("Dadri") 1690 | linesArray[1] = "Charkhi Dadri" 1691 | 1692 | if len(linesArray) != 11: 1693 | print("--> Ignoring: {}".format(linesArray)) 1694 | return "\n" 1695 | 1696 | recovery = 0 1697 | if '[' in linesArray[4]: 1698 | recovery = linesArray[4].split('[')[0] 1699 | else: 1700 | recovery = linesArray[4] 1701 | 1702 | deaths = 0 1703 | if '[' in linesArray[7]: 1704 | deaths = linesArray[7].split('[')[0] 1705 | else: 1706 | deaths = linesArray[7] 1707 | 1708 | outputString = linesArray[1] + "," + linesArray[3] + "," + str(recovery) + "," + str(deaths) + "\n" 1709 | return outputString 1710 | """ 1711 | 1712 | def HRFormatLine(row): 1713 | row[1] = re.sub('\*', '', row[1]) 1714 | if '[' in row[3]: 1715 | row[3] = row[3].split('[')[0] 1716 | if '[' in row[4]: 1717 | row[4] = row[4].split('[')[0] 1718 | if '[' in row[7]: 1719 | row[7] = row[7].split('[')[0] 1720 | if '[' in row[6]: 1721 | row[6] = row[6].split('[')[0] 1722 | 1723 | line = row[1] + "," + row[3] + "," + row[4] + "," + str(int(row[6]) + int (row[7])) + "\n" 1724 | return line 1725 | 1726 | def APFormatLine(row): 1727 | line = row[1] + "," + row[3] + "," + row[5] + "," + row[6] + "\n" 1728 | return line 1729 | 1730 | 1731 | def WBFormatLine(row): 1732 | row[2] = re.sub(',', '', re.sub('\+.*', '', row[2])) 1733 | row[3] = re.sub(',', '', re.sub('\+.*', '', row[3])) 1734 | row[4] = re.sub('\#', '', re.sub(',', '', re.sub('\+.*', '', row[4]))) 1735 | row[5] = re.sub(',', '', re.sub('\+.*', '', row[5])) 1736 | line = row[1] + "," + row[2] + "," + row[3] + "," + row[4] + "\n" 1737 | return line 1738 | 1739 | ''' 1740 | This method uses camelot package to read a pdf and then parse it into a csv file. 1741 | In this method, we read the pdf either from the meta file or from the pdfUrl global variable. This variable can be set from the cmd line. 1742 | The method also takes user input for page number or allows for page number to be used from the pageId global variable. 1743 | The method, reads a specific page, then for that page, decides if a line has to be ignored using starting and ending keys. 1744 | Then the method calls a "FormatLine(row)" function that calls the corresponding function to allow for any row/line to be manipulated. 1745 | The outputs are written to a .csv file. This is read inside the corresponding GetData() functions which call deltaCalculator to calculate deltas. 1746 | ''' 1747 | def readFileFromURLV2(url, stateName, startKey, endKey): 1748 | global pdfUrl 1749 | global pageId 1750 | stateFileName = metaDictionary[stateName].stateCode 1751 | 1752 | if len(pdfUrl) > 0: 1753 | url = pdfUrl 1754 | if len(url) > 0: 1755 | #print("--> Requesting download from {} ".format(url)) 1756 | r = requests.get(url, allow_redirects=True, verify=False) 1757 | open(".tmp/" + stateFileName + ".pdf", 'wb').write(r.content) 1758 | if len(pageId) > 0: 1759 | pid = "" 1760 | if ',' in pageId: 1761 | startPage = int(pageId.split(',')[0]) 1762 | endPage = int(pageId.split(',')[1]) 1763 | for pages in range(startPage, endPage + 1, 1): 1764 | print(pages) 1765 | pid = pid + "," + str(pages) if len(pid) > 0 else str(pages) 1766 | print(pid) 1767 | else: 1768 | pid = pageId 1769 | else: 1770 | pid = input("Enter district page:") 1771 | print("Running for {} pages".format(pid)) 1772 | tables = camelot.read_pdf(".tmp/" + stateFileName + ".pdf", strip_text = '\n', pages = pid, split_text = True) 1773 | for index, table in enumerate(tables): 1774 | tables[index].to_csv('.tmp/' + stateFileName + str(index) + '.pdf.txt') 1775 | 1776 | stateOutputFile = open('.tmp/' + stateFileName.lower() + '.csv', 'w') 1777 | csvWriter = csv.writer(stateOutputFile) 1778 | arrayToWrite = [] 1779 | 1780 | startedReadingDistricts = False 1781 | for index, table in enumerate(tables): 1782 | with open('.tmp/' + stateFileName + str(index) + '.pdf.txt', newline='') as stateCSVFile: 1783 | rowReader = csv.reader(stateCSVFile, delimiter=',', quotechar='"') 1784 | for row in rowReader: 1785 | line = "|".join(row) 1786 | line = re.sub("\|+", '|', line) 1787 | if startKey in line: 1788 | startedReadingDistricts = True 1789 | if len(endKey) > 0 and endKey in line: 1790 | startedReadingDistricts = False 1791 | continue 1792 | if startedReadingDistricts == False: 1793 | continue 1794 | 1795 | line = eval(stateFileName + "FormatLine")(line.split('|')) 1796 | if line == "\n": 1797 | continue 1798 | print(line, file = stateOutputFile, end = "") 1799 | 1800 | stateOutputFile.close() 1801 | 1802 | ''' This will be deprecated. ''' 1803 | def readFileFromURL(url, stateName, startKey, endKey): 1804 | global pdfUrl 1805 | global pageId 1806 | stateFileName = metaDictionary[stateName].stateCode 1807 | if len(pdfUrl) > 0: 1808 | url = pdfUrl 1809 | 1810 | if len(url) > 0: 1811 | r = requests.get(url, allow_redirects=True, verify=False) 1812 | open(".tmp/" + stateFileName + ".pdf", 'wb').write(r.content) 1813 | 1814 | with open(".tmp/" + stateFileName + ".pdf", "rb") as f: 1815 | pdf = pdftotext.PDF(f) 1816 | 1817 | fileToWrite = open(".tmp/" + stateFileName + ".pdf.txt", "w") 1818 | if len(pageId) > 0: 1819 | pid = pageId 1820 | else: 1821 | pid = input("Enter district page:") 1822 | print(pdf[int(pid)], file = fileToWrite) 1823 | fileToWrite.close() 1824 | 1825 | fileToWrite = open(".tmp/" + stateFileName + '.pdf.txt', 'r') 1826 | lines = fileToWrite.readlines() 1827 | stateOutputFileName = open(".tmp/" + stateFileName + '.txt', 'w') 1828 | 1829 | startedReadingDistricts = False 1830 | outputLines = [] 1831 | for line in lines: 1832 | if startKey in line: 1833 | startedReadingDistricts = True 1834 | if endKey in line: 1835 | startedReadingDistricts = False 1836 | continue 1837 | 1838 | if startedReadingDistricts == False: 1839 | continue 1840 | print(eval(stateFileName + "FormatLine")(line), file = stateOutputFileName, end = " ") 1841 | 1842 | stateOutputFileName.close() 1843 | fileToWrite.close() 1844 | 1845 | '''This will eventually be moved to TNFormatLine(row) function''' 1846 | def convertTnPDFToCSV(): 1847 | global pdfUrl 1848 | global typeOfAutomation 1849 | 1850 | if len(pdfUrl) > 0: 1851 | r = requests.get(pdfUrl, allow_redirects=True, verify=False) 1852 | open(".tmp/tn.pdf", 'wb').write(r.content) 1853 | 1854 | try: 1855 | with open(".tmp/" + "tn.pdf", "rb") as f: 1856 | pdf = pdftotext.PDF(f) 1857 | except FileNotFoundError: 1858 | print("Make sure tn.pdf is present in the current folder and rerun the script! Arigatou gozaimasu.") 1859 | return 1860 | 1861 | tables = camelot.read_pdf('.tmp/tn.pdf',strip_text='\n', pages="7", split_text = True) 1862 | tables[0].to_csv('.tmp/tn.pdf.txt') 1863 | 1864 | tnFile = open(".tmp/" + 'tn.pdf.txt', 'r') 1865 | lines = tnFile.readlines() 1866 | tnOutputFile = open(".tmp/" + 'tn.csv', 'w') 1867 | 1868 | startedReadingDistricts = False 1869 | airportRun = 1 1870 | airportConfirmedCount = 0 1871 | airportRecoveredCount = 0 1872 | airportDeceasedCount = 0 1873 | with open('.tmp/tn.pdf.txt', newline='') as csvfile: 1874 | rowReader = csv.reader(csvfile, delimiter=',', quotechar='"') 1875 | line = "" 1876 | for row in rowReader: 1877 | line = '|'.join(row) 1878 | 1879 | if 'Ariyalur' in line: 1880 | startedReadingDistricts = True 1881 | if 'Total' in line: 1882 | startedReadingDistricts = False 1883 | 1884 | if startedReadingDistricts == False: 1885 | continue 1886 | 1887 | line = line.replace('"', '').replace('*', '').replace('#', '').replace(',', '').replace('$', '') 1888 | linesArray = line.split('|') 1889 | 1890 | if len(linesArray) < 6: 1891 | print("--> Ignoring line: {} due to less columns".format(line)) 1892 | continue 1893 | 1894 | if 'Airport' in line: 1895 | airportConfirmedCount += int(linesArray[2]) 1896 | airportRecoveredCount += int(linesArray[3]) 1897 | airportDeceasedCount += int(linesArray[5]) 1898 | if airportRun == 1: 1899 | airportRun += 1 1900 | continue 1901 | else: 1902 | print("{}, {}, {}, {}\n".format('Airport Quarantine', airportConfirmedCount, airportRecoveredCount, airportDeceasedCount), file = tnOutputFile) 1903 | continue 1904 | if 'Railway' in line: 1905 | print("{}, {}, {}, {}".format('Railway Quarantine', linesArray[2], linesArray[3], linesArray[5]), file = tnOutputFile) 1906 | continue 1907 | 1908 | print("{}, {}, {}, {}".format(linesArray[1], linesArray[2], linesArray[3], linesArray[5]), file = tnOutputFile) 1909 | 1910 | tnOutputFile.close() 1911 | 1912 | def is_number(s): 1913 | try: 1914 | int(s) 1915 | return True 1916 | except ValueError: 1917 | return False 1918 | 1919 | def main(): 1920 | 1921 | loadMetaData() 1922 | stateName = "" 1923 | global option 1924 | global typeOfAutomation 1925 | global pdfUrl 1926 | global pageId 1927 | 1928 | if len(sys.argv) not in [1, 2, 3, 4]: 1929 | print('Usage: ./automation "[StateName]" "[detailed/full]" "[ocr/pdf=url]"') 1930 | return 1931 | 1932 | if len(sys.argv) == 2: 1933 | stateName = sys.argv[1] 1934 | 1935 | if len(sys.argv) == 3: 1936 | stateName = sys.argv[1] 1937 | option = sys.argv[2] 1938 | 1939 | if len(sys.argv) == 4: 1940 | stateName = sys.argv[1] 1941 | option = sys.argv[2] 1942 | if "=" in sys.argv[3]: 1943 | typeOfAutomation = sys.argv[3].split("=")[0] 1944 | pdfUrl = sys.argv[3].split("=")[1] 1945 | if len(sys.argv[3].split("=")) > 2: 1946 | pageId = sys.argv[3].split("=")[2] 1947 | else: 1948 | typeOfAutomation = sys.argv[3] 1949 | 1950 | print("Using pageId: {}".format(pageId)) 1951 | 1952 | if not stateName: 1953 | stateName = "All States" 1954 | fetchData(stateName) 1955 | 1956 | if __name__ == '__main__': 1957 | main() 1958 | 1959 | --------------------------------------------------------------------------------